Jaegeon Lee’s Master’s Thesis
Differences in Commuting Inflow Patterns among the Employment Districts in Seoul along with the Adoption of Work-from-Home during the COVID-19 Pandemic Period
R Setup
library(tidyverse)
library(DescTools)
library(kableExtra)
library(rgdal)
library(rgeos)
library(sf)
library(raster)
library(spdep)
library(tmap)
library(tmaptools)
library(cartogram)
library(viridisLite)
library(patchwork)
library(xts)
library(data.table)
library(od)
library(viridisLite)
library(DT)
library(foreach)
library(doParallel)
library(igraph)
library(tidygraph)
library(furrr)
library(rlist)
library(ggrepel)
library(ggeffects)
library(factoextra)
library(FactoMineR)
library(lwgeom)
library(bipartite)
library(tsibble)
library(lubridate)
library(hrbrthemes)
library(sjPlot)
library(forcats)## 문자열 깨질 때 로케일 설정 - 윈도우
#Sys.getlocale()
#localeToCharset()
#
## 미국 로케일로 로케일을 변환하기
#Sys.setlocale(category = 'LC_ALL',locale = 'english')
#localeToCharset()
#
## 우리나라로 로케일 변경하기
#Sys.setlocale(category = 'LC_ALL',locale='korean')
#localeToCharset()
1 consistent administration codes
1.1 kosis code
kosis_code <- readxl::read_excel("adm_codes/통계청_행정동코드_2021년기준.xlsx")
colnames(kosis_code) <- c("adm_cd", "adm_nm")
kosis_code:) # A tibble: 3,792 × 2
:) adm_cd adm_nm
:) <chr> <chr>
:) 1 <NA> <NA>
:) 2 행정구역코드 행정구역명
:) 3 11 서울특별시
:) 4 21 부산광역시
:) 5 22 대구광역시
:) 6 23 인천광역시
:) 7 24 광주광역시
:) 8 25 대전광역시
:) 9 26 울산광역시
:) 10 29 세종특별자치시
:) # ℹ 3,782 more rows
sujung <- tribble(
~sgg_cd, ~sgg_nm,
"31021", "수정구"
)
kosis_sgg <- kosis_code %>%
rename(sgg_cd = adm_cd) %>%
rename(sgg_nm = adm_nm) %>%
filter(!sgg_nm %in% c("수원시", "성남시", "안양시", "안산시", "고양시", "용인시")) %>%
filter(str_length(sgg_cd) == 5) %>%
filter(str_sub(sgg_cd, 1, 2) %in% c("11", "23", "31")) %>%
filter(sgg_nm != "옹진군" | str_sub(sgg_cd, 1, 5) != "23320") %>%
bind_rows(sujung) %>%
arrange(sgg_cd)
kosis_sgg:) # A tibble: 76 × 2
:) sgg_cd sgg_nm
:) <chr> <chr>
:) 1 11010 종로구
:) 2 11020 중구
:) 3 11030 용산구
:) 4 11040 성동구
:) 5 11050 광진구
:) 6 11060 동대문구
:) 7 11070 중랑구
:) 8 11080 성북구
:) 9 11090 강북구
:) 10 11100 도봉구
:) # ℹ 66 more rows
kosis_code <- kosis_code %>%
filter(as.numeric(adm_cd) > 0 & str_length(adm_cd) > 5) %>%
filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
filter(str_sub(adm_cd, 1, 5) != "23320") %>%
mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
left_join(kosis_sgg, by = "sgg_cd") %>%
select(-sgg_cd) %>%
mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
select(adm_cd, adm_nm)
kosis_code:) # A tibble: 1,130 × 2
:) adm_cd adm_nm
:) <chr> <chr>
:) 1 1101053 종로구_사직동
:) 2 1101054 종로구_삼청동
:) 3 1101055 종로구_부암동
:) 4 1101056 종로구_평창동
:) 5 1101057 종로구_무악동
:) 6 1101058 종로구_교남동
:) 7 1101060 종로구_가회동
:) 8 1101061 종로구_종로1.2.3.4가동
:) 9 1101063 종로구_종로5.6가동
:) 10 1101064 종로구_이화동
:) # ℹ 1,120 more rows
:) adm_cd adm_nm
:) 0 0
1.2 shp code
# KOSIS에는 항동이 있지만, 생활이동데이터에는 항동이 없음
dong.sf <- sf::st_read("data_shp/shp_edited_sma_contiguous/shp_edited_sma_contiguous.shp", options = "ENCODING=euc-kr") %>%
select(-BASE_DATE):) options: ENCODING=euc-kr
:) Reading layer `shp_edited_sma_contiguous' from data source `C:\Users\lejae\OneDrive\바탕 화면\thesis_최종\thesis_analysis_분석\data_shp\shp_edited_sma_contiguous\shp_edited_sma_contiguous.shp' using driver `ESRI Shapefile'
:) Simple feature collection with 1124 features and 3 fields
:) Geometry type: MULTIPOLYGON
:) Dimension: XY
:) Bounding box: xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:) Projected CRS: Korea 2000 / Unified CS
colnames(dong.sf) <- c("adm_cd", "adm_nm", "geometry")
dong.sf <- dong.sf %>%
filter(adm_nm != "옹진군" | str_sub(adm_cd, 1, 5) != "23320") %>% # 23320이 옹진군임
filter(as.numeric(adm_cd) > 0 & str_length(adm_cd) > 5) %>%
filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
left_join(kosis_sgg, by = "sgg_cd") %>%
select(-sgg_cd) %>%
mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%
mutate(adm_cd = replace(adm_cd, adm_nm == "구로구_오류2동", "1117068"), # 생활이동데이터에 기반한 멤버십에 항동이 따로 없으므로 이 단계에서는 필수적으로 이렇게 해야 함.
adm_cd = replace(adm_cd, adm_nm == "구로구_항동", "1117068"),
adm_nm = replace(adm_nm, adm_nm == "구로구_항동", "구로구_오류2동")) %>%
st_snap_to_grid(size = 0.08) %>%
st_make_valid() %>%
group_by(adm_nm, adm_cd) %>%
summarise(geometry = st_union(geometry)) %>%
ungroup() %>%
arrange(adm_cd)
dong.sf:) Simple feature collection with 1123 features and 2 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 1,123 × 3
:) adm_nm adm_cd geometry
:) <chr> <chr> <MULTIPOLYGON [m]>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...
:) # ℹ 1,113 more rows
:) adm_nm adm_cd geometry
:) 0 0 0
2 functional districts
2.1 binding
Refer to the .csv file below
binding/cumu_tibble_yesscaling_modularity_CLK_2020_2021_01_time_111213_0720_1100_SemiModifiedExpectation.csv
2020년과 2021년 각각의 1월 평일 11:00 ~ 13:00 동안의 모든 통행 유형을
포함하는 통행 네트워크를 구축함. 위 네트워크를 바탕으로 합역적 바인딩을
수행함.
2.2 import membership information
# 생활이동데이터에는 항동이 없음
membership_info_eng <- read_csv('data_membership/membership_info_eng_2020_2021_01_time_111213_0725_1313_SemiModifiedExpectation.csv', show_col_types = FALSE) %>%
mutate(ADM_CD = as.character(ADM_CD)) %>%
rename(name = member_eng) %>%
select(-ADM_NM) %>%
rename(adm_cd = ADM_CD) %>%
mutate(name = ifelse(name == "Myeong", "Myeongdong", name))
membership_info_eng:) # A tibble: 424 × 3
:) adm_cd member name
:) <chr> <chr> <chr>
:) 1 1101053 left_49_47_further Sajik
:) 2 1101054 right_49_47 Jongno
:) 3 1101055 left_49_47 Seongbuk
:) 4 1101056 left_49_47 Seongbuk
:) 5 1101057 left_49_47_further Sajik
:) 6 1101058 left_49_47_further Sajik
:) 7 1101060 right_49_47 Jongno
:) 8 1101061 right_49_47 Jongno
:) 9 1101063 left_14_9 Changshin
:) 10 1101064 right_49_47 Jongno
:) # ℹ 414 more rows
2.3 spatial aggregation
dong.sf_commune <- dong.sf %>%
filter(str_sub(adm_cd, 1, 2) == "11") %>%
st_snap_to_grid(size = 0.02) %>%
st_make_valid() %>%
left_join(membership_info_eng,
by = "adm_cd") %>%
group_by(name) %>%
summarise(geometry = st_union(geometry)) %>%
mutate(name = factor(name)) %>%
mutate(area = sf::st_area(geometry)) %>%
mutate(name = as.character(name))
dong.sf_commune:) Simple feature collection with 54 features and 2 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 54 × 3
:) name geometry area
:) * <chr> <POLYGON [m]> [m^2]
:) 1 Anam ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236.
:) 2 Balsan ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ... 8854094.
:) 3 Bangbae ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435.
:) 4 Banghak ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974.
:) 5 Bangi ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966.
:) 6 Banpo ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ... 6799150.
:) 7 Changshin ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ... 1972479.
:) 8 Cheongdam ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461.
:) 9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ... 2057803.
:) 10 Daechi ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.
:) # ℹ 44 more rows
:) name geometry area
:) 0 0 0
2.4 mapping: figure 1a
2.4.1 SMA
sido.sf <- dong.sf %>%
st_snap_to_grid(size = 0.08) %>%
st_make_valid() %>%
mutate(sido_cd = str_sub(adm_cd, 1, 2)) %>%
group_by(sido_cd) %>%
summarise(geometry = st_union(geometry)) %>%
mutate(sido_cd = factor(sido_cd)) %>%
mutate(area = sf::st_area(geometry)) %>%
mutate(sido_cd = as.character(sido_cd)) %>%
mutate(sido_nm = sido_cd) %>%
mutate(sido_nm = case_when(as.character(sido_cd) == "11" ~ "Seoul",
as.character(sido_cd) == "23" ~ "Incheon",
as.character(sido_cd) == "31" ~ "Kyeonggi"))
sido.sf:) Simple feature collection with 3 features and 3 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 3 × 4
:) sido_cd geometry area sido_nm
:) * <chr> <GEOMETRY [m]> [m^2] <chr>
:) 1 11 POLYGON ((949615 1938876, 949613 1938885, 949609 1938899, 949605 1938918, 949604 1938923, 949598 ... 605300078. Seoul
:) 2 23 MULTIPOLYGON (((927779 1932133, 927771 1932132, 927770 1932133, 927770 1932133, 927766 1932141, 9... 918081359. Incheon
:) 3 31 MULTIPOLYGON (((931105 1890395, 931117 1890393, 931119 1890392, 931125 1890391, 931129 1890390, 9... 10297762326. Kyeonggi
sido.sf_centroid <- sido.sf %>%
st_centroid()
sido.sf_centroid %>%
st_geometry() %>%
as.data.frame() %>%
mutate(geometry = as.character(geometry)):) geometry
:) 1 c(955110.28842015, 1950405.41966252)
:) 2 c(913867.034798075, 1954864.44894772)
:) 3 c(971411.321311472, 1948334.49203402)
st_geometry(sido.sf_centroid)[[2]] <- st_point(c(923867.034798075, 1950864.44894772))
st_geometry(sido.sf_centroid)[[3]] <- st_point(c(991411.321311472, 1937334.49203402))seoul.sf <- dong.sf %>%
st_snap_to_grid(size = 0.08) %>%
st_make_valid() %>%
filter(str_sub(adm_cd, 1, 2) == "11") %>%
summarise(geometry = st_union(geometry))
seoul.sf:) Simple feature collection with 1 feature and 0 fields
:) Geometry type: POLYGON
:) Dimension: XY
:) Bounding box: xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 1 × 1
:) geometry
:) <POLYGON [m]>
:) 1 ((949615 1938876, 949613 1938885, 949609 1938899, 949605 1938918, 949604 1938923, 949598 ...
2.4.2 korea
korea.sf <- sf::st_read("data_shp/shp_edited_korea/bnd_sigungu_00_2020_2020_4Q.shp", options = "ENCODING=euc-kr") %>%
select(-BASE_DATE):) options: ENCODING=euc-kr
:) Reading layer `bnd_sigungu_00_2020_2020_4Q' from data source `C:\Users\lejae\OneDrive\바탕 화면\thesis_최종\thesis_analysis_분석\data_shp\shp_edited_korea\bnd_sigungu_00_2020_2020_4Q.shp' using driver `ESRI Shapefile'
:) Simple feature collection with 250 features and 3 fields
:) Geometry type: MULTIPOLYGON
:) Dimension: XY
:) Bounding box: xmin: 746000 ymin: 1460000 xmax: 1390000 ymax: 2070000
:) Projected CRS: Korea 2000 / Unified CS
:) Simple feature collection with 250 features and 2 fields
:) Geometry type: MULTIPOLYGON
:) Dimension: XY
:) Bounding box: xmin: 746000 ymin: 1460000 xmax: 1390000 ymax: 2070000
:) Projected CRS: Korea 2000 / Unified CS
:) First 10 features:
:) SIGUNGU_CD SIGUNGU_NM geometry
:) 1 11010 종로구 MULTIPOLYGON (((953684 1959...
:) 2 11020 중구 MULTIPOLYGON (((957890 1952...
:) 3 11030 용산구 MULTIPOLYGON (((953114 1950...
:) 4 11040 성동구 MULTIPOLYGON (((959382 1952...
:) 5 11050 광진구 MULTIPOLYGON (((964825 1952...
:) 6 11060 동대문구 MULTIPOLYGON (((961992 1956...
:) 7 11070 중랑구 MULTIPOLYGON (((965699 1957...
:) 8 11080 성북구 MULTIPOLYGON (((954470 1959...
:) 9 11090 강북구 MULTIPOLYGON (((956319 1965...
:) 10 11100 도봉구 MULTIPOLYGON (((957671 1966...
3 background EDAs
3.1 edu level by industry and occupation
3.1.1 import regional_mdis_1
regional_mdis_1 <- read_csv("data_mdis_econsurvey/regional_mdis/2021_하반기_A형_시군구_대분류_2021.csv",
locale=locale('ko',encoding='euc-kr'))
regional_mdis_1 %>%
is.na() %>%
colSums():) 만연령 교육정도코드 사업체소재지행정구역코드 10차_산업분류코드 7차_직업분류코드 종사상지위코드 행정구역코드 경제활동구분코드
:) 0 0 175354 175354 175354 175354 0 0
colnames(regional_mdis_1) <- c("age", "edu", "workplace_cd", "ind", "occup", "hierar", "resid_cd", "empstatus")
regional_mdis_1:) # A tibble: 431,235 × 8
:) age edu workplace_cd ind occup hierar resid_cd empstatus
:) <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 100 0 NA <NA> NA NA 1104 3
:) 2 100 0 NA <NA> NA NA 1109 3
:) 3 100 0 NA <NA> NA NA 1111 3
:) 4 100 0 NA <NA> NA NA 1111 3
:) 5 100 0 NA <NA> NA NA 2502 3
:) 6 100 0 NA <NA> NA NA 3121 3
:) 7 100 0 NA <NA> NA NA 3204 3
:) 8 100 0 NA <NA> NA NA 3232 3
:) 9 100 0 NA <NA> NA NA 3304 3
:) 10 100 0 NA <NA> NA NA 3506 3
:) # ℹ 431,225 more rows
regional_mdis_1 <- regional_mdis_1 %>%
mutate_if(is.numeric, as.character) %>%
filter(!is.na(workplace_cd)) %>%
filter(as.character(empstatus) == "1") %>% # 취업자만
select(-empstatus) %>%
select(-hierar) %>%
filter(age > 19) %>%
filter(age < 61) %>%
select(-age) %>%
relocate(resid_cd, workplace_cd, edu, occup, ind)
regional_mdis_1:) # A tibble: 169,365 × 5
:) resid_cd workplace_cd edu occup ind
:) <chr> <chr> <chr> <chr> <chr>
:) 1 3102 3102 2 9 N
:) 2 1106 1101 3 4 I
:) 3 1115 1102 3 3 N
:) 4 1103 1103 3 4 I
:) 5 1104 1104 3 9 H
:) 6 1106 1106 3 7 P
:) 7 1109 1109 3 3 G
:) 8 1110 1110 3 5 G
:) 9 1109 1111 3 8 C
:) 10 1110 1111 3 4 I
:) # ℹ 169,355 more rows
regional_mdis_1 <- regional_mdis_1 %>%
filter(str_sub(resid_cd, 1, 2) %in% c("11", "23", "31")) %>%
filter(str_sub(workplace_cd, 1, 2) == "11")
regional_mdis_1:) # A tibble: 23,653 × 5
:) resid_cd workplace_cd edu occup ind
:) <chr> <chr> <chr> <chr> <chr>
:) 1 1106 1101 3 4 I
:) 2 1115 1102 3 3 N
:) 3 1103 1103 3 4 I
:) 4 1104 1104 3 9 H
:) 5 1106 1106 3 7 P
:) 6 1109 1109 3 3 G
:) 7 1110 1110 3 5 G
:) 8 1109 1111 3 8 C
:) 9 1110 1111 3 4 I
:) 10 1111 1111 3 4 S
:) # ℹ 23,643 more rows
#1101 서울종로구
#1102 서울중구
#1103 서울용산구
#1104 서울성동구
#1105 서울광진구
#1106 서울동대문구
#1107 서울중랑구
#1108 서울성북구
#1109 서울강북구
#1110 서울도봉구
#1111 서울노원구
#1112 서울은평구
#1113 서울서대문구
#1114 서울마포구
#1115 서울양천구
#1116 서울강서구
#1117 서울구로구
#1118 서울금천구
#1119 서울영등포구
#1120 서울동작구
#1121 서울관악구
#1122 서울서초구
#1123 서울강남구
#1124 서울송파구
#1125 서울강동구regional_mdis_1 <- regional_mdis_1 %>%
mutate(ind = recode(ind,
'A' = '농어업',
'B' = '광업',
'C' = '제조업',
'D' = '전기가스업',
'E' = '수도하수업',
'F' = '건설업',
'G' = '도소매업',
'H' = '운수창고업',
'I' = '숙박음식업',
'J' = '정보통신업',
'K' = '금융보험업',
'L' = '부동산업',
'M' = '전문과학기술업',
'N' = '사업지원업',
'O' = '공공행정',
'P' = '교육',
'Q' = '보건복지',
'R' = '예술스포츠여가',
'S' = '협회및개인서비스',
'T' = 'nu1',
'U' = 'nu2')) %>%
filter(!ind %in% c("nu1", "nu2")) %>%
filter(!ind %in% c("건설업", "농어업", "광업", "수도하수업", "전기가스업")) %>%
#mutate(occup = factor(occup)) %>%
mutate(occup = recode(occup,
'1' = '관리직',
'2' = '전문직',
'3' = '사무직',
'4' = '서비스직',
'5' = '판매직',
'6' = '농어업숙련직',
'7' = '기능직',
'8' = '기계조작직',
'9' = '단순노무직')) %>%
filter(ind != '농어업숙련직') %>%
mutate(edu = as.character(edu)) %>%
mutate(edu = recode(edu,
'0' = '무학',
'1' = '초졸이하',
'2' = '중졸',
'3' = '고졸',
'4' = '초대졸',
'5' = '대졸',
'6' = '대학원석사',
'7' = '대학원박사')) %>%
mutate(edu_college = ifelse(edu %in% c('무학', '초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))
regional_mdis_1:) # A tibble: 22,156 × 6
:) resid_cd workplace_cd edu occup ind edu_college
:) <chr> <chr> <chr> <chr> <chr> <chr>
:) 1 1106 1101 고졸 서비스직 숙박음식업 대졸미만
:) 2 1115 1102 고졸 사무직 사업지원업 대졸미만
:) 3 1103 1103 고졸 서비스직 숙박음식업 대졸미만
:) 4 1104 1104 고졸 단순노무직 운수창고업 대졸미만
:) 5 1106 1106 고졸 기능직 교육 대졸미만
:) 6 1109 1109 고졸 사무직 도소매업 대졸미만
:) 7 1110 1110 고졸 판매직 도소매업 대졸미만
:) 8 1109 1111 고졸 기계조작직 제조업 대졸미만
:) 9 1110 1111 고졸 서비스직 숙박음식업 대졸미만
:) 10 1111 1111 고졸 서비스직 협회및개인서비스 대졸미만
:) # ℹ 22,146 more rows
3.1.2 import regional_mdis_2
regional_mdis_2 <- read_csv("data_mdis_econsurvey/regional_mdis/2021_상반기_A형_시군구_대분류_2021.csv",
locale=locale('ko',encoding='euc-kr'))
regional_mdis_2:) # A tibble: 435,770 × 8
:) 행정구역시군구코드 만연령 교육정도코드 `10차산업대분류코드` 사업체소재지행정구역코드 `7차_1자리직업대분류코드` 종사상지위코드 경제활동구분코드
:) <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 1101 15 2 <NA> NA NA NA 3
:) 2 1101 15 2 <NA> NA NA NA 3
:) 3 1101 15 2 <NA> NA NA NA 3
:) 4 1101 15 2 <NA> NA NA NA 3
:) 5 1101 15 3 <NA> NA NA NA 3
:) 6 1101 15 3 <NA> NA NA NA 3
:) 7 1101 15 3 <NA> NA NA NA 3
:) 8 1101 15 3 <NA> NA NA NA 3
:) 9 1101 15 3 <NA> NA NA NA 3
:) 10 1101 16 2 <NA> NA NA NA 3
:) # ℹ 435,760 more rows
:) 행정구역시군구코드 만연령 교육정도코드 10차산업대분류코드 사업체소재지행정구역코드 7차_1자리직업대분류코드 종사상지위코드 경제활동구분코드
:) 0 0 0 180846 180846 180846 180846 0
colnames(regional_mdis_2) <- c("resid_cd", "age", "edu", "ind", "workplace_cd", "occup", "hierar", "empstatus")
regional_mdis_2:) # A tibble: 435,770 × 8
:) resid_cd age edu ind workplace_cd occup hierar empstatus
:) <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 1101 15 2 <NA> NA NA NA 3
:) 2 1101 15 2 <NA> NA NA NA 3
:) 3 1101 15 2 <NA> NA NA NA 3
:) 4 1101 15 2 <NA> NA NA NA 3
:) 5 1101 15 3 <NA> NA NA NA 3
:) 6 1101 15 3 <NA> NA NA NA 3
:) 7 1101 15 3 <NA> NA NA NA 3
:) 8 1101 15 3 <NA> NA NA NA 3
:) 9 1101 15 3 <NA> NA NA NA 3
:) 10 1101 16 2 <NA> NA NA NA 3
:) # ℹ 435,760 more rows
regional_mdis_2 <- regional_mdis_2 %>%
mutate_if(is.numeric, as.character) %>%
filter(!is.na(workplace_cd)) %>%
filter(as.character(empstatus) == "1") %>% # 취업자만
select(-empstatus) %>%
select(-hierar) %>%
filter(age > 19) %>%
filter(age < 61) %>%
select(-age) %>%
relocate(resid_cd, workplace_cd, edu, occup, ind)
regional_mdis_2:) # A tibble: 171,773 × 5
:) resid_cd workplace_cd edu occup ind
:) <chr> <chr> <chr> <chr> <chr>
:) 1 1101 1101 3 4 I
:) 2 1101 1104 3 7 I
:) 3 1101 2602 4 5 G
:) 4 1101 1101 4 3 R
:) 5 1101 1103 5 4 I
:) 6 1101 1123 3 4 M
:) 7 1101 1101 5 4 I
:) 8 1101 1106 5 3 K
:) 9 1101 1111 5 2 P
:) 10 1101 1101 5 4 R
:) # ℹ 171,763 more rows
regional_mdis_2 <- regional_mdis_2 %>%
filter(str_sub(resid_cd, 1, 2) %in% c("11", "23", "31")) %>%
filter(str_sub(workplace_cd, 1, 2) == "11")
regional_mdis_2:) # A tibble: 23,975 × 5
:) resid_cd workplace_cd edu occup ind
:) <chr> <chr> <chr> <chr> <chr>
:) 1 1101 1101 3 4 I
:) 2 1101 1104 3 7 I
:) 3 1101 1101 4 3 R
:) 4 1101 1103 5 4 I
:) 5 1101 1123 3 4 M
:) 6 1101 1101 5 4 I
:) 7 1101 1106 5 3 K
:) 8 1101 1111 5 2 P
:) 9 1101 1101 5 4 R
:) 10 1101 1101 4 7 C
:) # ℹ 23,965 more rows
regional_mdis_2 <- regional_mdis_2 %>%
mutate(ind = recode(ind,
'A' = '농어업',
'B' = '광업',
'C' = '제조업',
'D' = '전기가스업',
'E' = '수도하수업',
'F' = '건설업',
'G' = '도소매업',
'H' = '운수창고업',
'I' = '숙박음식업',
'J' = '정보통신업',
'K' = '금융보험업',
'L' = '부동산업',
'M' = '전문과학기술업',
'N' = '사업지원업',
'O' = '공공행정',
'P' = '교육',
'Q' = '보건복지',
'R' = '예술스포츠여가',
'S' = '협회및개인서비스',
'T' = 'nu1',
'U' = 'nu2')) %>%
filter(!ind %in% c("nu1", "nu2")) %>%
filter(!ind %in% c("건설업", "농어업", "광업", "수도하수업", "전기가스업")) %>%
#mutate(occup = factor(occup)) %>%
mutate(occup = recode(occup,
'1' = '관리직',
'2' = '전문직',
'3' = '사무직',
'4' = '서비스직',
'5' = '판매직',
'6' = '농어업숙련직',
'7' = '기능직',
'8' = '기계조작직',
'9' = '단순노무직')) %>%
filter(ind != '농어업숙련직') %>%
mutate(edu = as.character(edu)) %>%
mutate(edu = recode(edu,
'0' = '무학',
'1' = '초졸이하',
'2' = '중졸',
'3' = '고졸',
'4' = '초대졸',
'5' = '대졸',
'6' = '대학원석사',
'7' = '대학원박사')) %>%
mutate(edu_college = ifelse(edu %in% c('무학', '초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))
regional_mdis_2:) # A tibble: 22,427 × 6
:) resid_cd workplace_cd edu occup ind edu_college
:) <chr> <chr> <chr> <chr> <chr> <chr>
:) 1 1101 1101 고졸 서비스직 숙박음식업 대졸미만
:) 2 1101 1104 고졸 기능직 숙박음식업 대졸미만
:) 3 1101 1101 초대졸 사무직 예술스포츠여가 대졸미만
:) 4 1101 1103 대졸 서비스직 숙박음식업 대졸이상
:) 5 1101 1123 고졸 서비스직 전문과학기술업 대졸미만
:) 6 1101 1101 대졸 서비스직 숙박음식업 대졸이상
:) 7 1101 1106 대졸 사무직 금융보험업 대졸이상
:) 8 1101 1111 대졸 전문직 교육 대졸이상
:) 9 1101 1101 대졸 서비스직 예술스포츠여가 대졸이상
:) 10 1101 1101 초대졸 기능직 제조업 대졸미만
:) # ℹ 22,417 more rows
3.1.3 integrate
:) # A tibble: 44,583 × 6
:) resid_cd workplace_cd edu occup ind edu_college
:) <chr> <chr> <chr> <chr> <chr> <chr>
:) 1 1106 1101 고졸 서비스직 숙박음식업 대졸미만
:) 2 1115 1102 고졸 사무직 사업지원업 대졸미만
:) 3 1103 1103 고졸 서비스직 숙박음식업 대졸미만
:) 4 1104 1104 고졸 단순노무직 운수창고업 대졸미만
:) 5 1106 1106 고졸 기능직 교육 대졸미만
:) 6 1109 1109 고졸 사무직 도소매업 대졸미만
:) 7 1110 1110 고졸 판매직 도소매업 대졸미만
:) 8 1109 1111 고졸 기계조작직 제조업 대졸미만
:) 9 1110 1111 고졸 서비스직 숙박음식업 대졸미만
:) 10 1111 1111 고졸 서비스직 협회및개인서비스 대졸미만
:) # ℹ 44,573 more rows
regional_mdis <- regional_mdis %>%
group_by(occup, ind, edu_college) %>%
summarise(count = n())
regional_mdis:) # A tibble: 220 × 4
:) # Groups: occup, ind [113]
:) occup ind edu_college count
:) <chr> <chr> <chr> <int>
:) 1 관리직 공공행정 대졸이상 19
:) 2 관리직 교육 대졸미만 1
:) 3 관리직 교육 대졸이상 29
:) 4 관리직 금융보험업 대졸미만 7
:) 5 관리직 금융보험업 대졸이상 101
:) 6 관리직 도소매업 대졸미만 6
:) 7 관리직 도소매업 대졸이상 79
:) 8 관리직 보건복지 대졸이상 12
:) 9 관리직 부동산업 대졸미만 1
:) 10 관리직 부동산업 대졸이상 12
:) # ℹ 210 more rows
3.1.4 edu level by industry
edu_by_industry <- regional_mdis %>%
ungroup() %>%
group_by(ind, edu_college) %>%
summarise(count = sum(count))
edu_by_industry <- edu_by_industry %>%
pivot_wider(names_from = edu_college, values_from = count) %>%
ungroup()
colnames(edu_by_industry) <- c("ind", "below", "above")
edu_by_industry:) # A tibble: 14 × 3
:) ind below above
:) <chr> <int> <int>
:) 1 공공행정 466 1153
:) 2 교육 587 3255
:) 3 금융보험업 841 2104
:) 4 도소매업 3758 3910
:) 5 보건복지 1705 2116
:) 6 부동산업 439 530
:) 7 사업지원업 1384 958
:) 8 숙박음식업 2181 1194
:) 9 예술스포츠여가 324 648
:) 10 운수창고업 1200 857
:) 11 전문과학기술업 1017 4029
:) 12 정보통신업 1083 3702
:) 13 제조업 1869 1529
:) 14 협회및개인서비스 1006 738
edu_by_industry <- edu_by_industry %>%
mutate(college_and_above = above / (below + above)) %>%
select(ind, college_and_above)
edu_by_industry %>%
arrange(desc(college_and_above)):) # A tibble: 14 × 2
:) ind college_and_above
:) <chr> <dbl>
:) 1 교육 0.847
:) 2 전문과학기술업 0.798
:) 3 정보통신업 0.774
:) 4 금융보험업 0.714
:) 5 공공행정 0.712
:) 6 예술스포츠여가 0.667
:) 7 보건복지 0.554
:) 8 부동산업 0.547
:) 9 도소매업 0.510
:) 10 제조업 0.450
:) 11 협회및개인서비스 0.423
:) 12 운수창고업 0.417
:) 13 사업지원업 0.409
:) 14 숙박음식업 0.354
3.1.5 edu level by occupation
edu_by_occupation <- regional_mdis %>%
ungroup() %>%
group_by(occup, edu_college) %>%
summarise(count = sum(count))
edu_by_occupation <- edu_by_occupation %>%
pivot_wider(names_from = edu_college, values_from = count) %>%
ungroup()
colnames(edu_by_occupation) <- c("occup", "below", "above")
edu_by_occupation:) # A tibble: 9 × 3
:) occup below above
:) <chr> <int> <int>
:) 1 관리직 50 489
:) 2 기계조작직 1295 351
:) 3 기능직 1531 376
:) 4 농어업숙련직 7 6
:) 5 단순노무직 2147 572
:) 6 사무직 3759 8321
:) 7 서비스직 2918 1655
:) 8 전문직 3012 12710
:) 9 판매직 3141 2243
edu_by_occupation <- edu_by_occupation %>%
mutate(college_and_above = above / (below + above)) %>%
select(occup, college_and_above)
edu_by_occupation %>%
arrange(college_and_above):) # A tibble: 9 × 2
:) occup college_and_above
:) <chr> <dbl>
:) 1 기능직 0.197
:) 2 단순노무직 0.210
:) 3 기계조작직 0.213
:) 4 서비스직 0.362
:) 5 판매직 0.417
:) 6 농어업숙련직 0.462
:) 7 사무직 0.689
:) 8 전문직 0.808
:) 9 관리직 0.907
#관리자 Managers
#전문가 및 관련 종사자 Professionals and Related Workers
#사무 종사자 Clerks
#서비스 종사자 Service Workers
#판매 종사자 Sales Workers
#농림·어업 숙련 종사자 Skilled Agricultural, Forestry and Fishery Workers
#기능원 및 관련 기능 종사자 Craft and Related Trades Workers
#장치·기계 조작 및 조립 종사자 Equipment, Machine Operating and Assembling Workers
#단순노무 종사자 Elementary Workersedu_by_occupation <- edu_by_occupation %>%
mutate(occup = recode(occup,
'관리직' = 'Managerial',
'전문직' = 'Professional',
'사무직' = 'Clerical',
'서비스직' = 'Service',
'판매직' = 'Sales',
'농어업숙련직' = 'Agricultural\nand fishery',
'기능직' = 'Craft',
'기계조작직' = 'Machine\n operating',
'단순노무직' = 'Elemetrary'))
edu_by_occupation %>%
arrange(college_and_above) :) # A tibble: 9 × 2
:) occup college_and_above
:) <chr> <dbl>
:) 1 "Craft" 0.197
:) 2 "Elemetrary" 0.210
:) 3 "Machine\n operating" 0.213
:) 4 "Service" 0.362
:) 5 "Sales" 0.417
:) 6 "Agricultural\nand fishery" 0.462
:) 7 "Clerical" 0.689
:) 8 "Professional" 0.808
:) 9 "Managerial" 0.907
3.1.6 edu level by occupation (correlation)
:) # A tibble: 44,583 × 6
:) resid_cd workplace_cd edu occup ind edu_college
:) <chr> <chr> <chr> <chr> <chr> <chr>
:) 1 1106 1101 고졸 서비스직 숙박음식업 대졸미만
:) 2 1115 1102 고졸 사무직 사업지원업 대졸미만
:) 3 1103 1103 고졸 서비스직 숙박음식업 대졸미만
:) 4 1104 1104 고졸 단순노무직 운수창고업 대졸미만
:) 5 1106 1106 고졸 기능직 교육 대졸미만
:) 6 1109 1109 고졸 사무직 도소매업 대졸미만
:) 7 1110 1110 고졸 판매직 도소매업 대졸미만
:) 8 1109 1111 고졸 기계조작직 제조업 대졸미만
:) 9 1110 1111 고졸 서비스직 숙박음식업 대졸미만
:) 10 1111 1111 고졸 서비스직 협회및개인서비스 대졸미만
:) # ℹ 44,573 more rows
regional_mdis <- regional_mdis %>%
group_by(resid_cd, edu_college, occup, ind) %>%
summarise(count = n())
regional_mdis:) # A tibble: 5,430 × 5
:) # Groups: resid_cd, edu_college, occup [913]
:) resid_cd edu_college occup ind count
:) <chr> <chr> <chr> <chr> <int>
:) 1 1101 대졸미만 기계조작직 부동산업 1
:) 2 1101 대졸미만 기계조작직 숙박음식업 2
:) 3 1101 대졸미만 기계조작직 운수창고업 5
:) 4 1101 대졸미만 기계조작직 제조업 6
:) 5 1101 대졸미만 기능직 도소매업 3
:) 6 1101 대졸미만 기능직 사업지원업 3
:) 7 1101 대졸미만 기능직 숙박음식업 2
:) 8 1101 대졸미만 기능직 제조업 54
:) 9 1101 대졸미만 기능직 협회및개인서비스 5
:) 10 1101 대졸미만 단순노무직 공공행정 7
:) # ℹ 5,420 more rows
region_specific_edu <- regional_mdis %>%
ungroup() %>%
group_by(resid_cd, edu_college) %>%
summarise(count = n()) %>%
pivot_wider(names_from = edu_college, values_from = count) %>%
ungroup()
region_specific_edu:) # A tibble: 66 × 3
:) resid_cd 대졸미만 대졸이상
:) <chr> <int> <int>
:) 1 1101 52 61
:) 2 1102 53 52
:) 3 1103 58 64
:) 4 1104 56 55
:) 5 1105 62 63
:) 6 1106 63 59
:) 7 1107 68 57
:) 8 1108 65 59
:) 9 1109 62 58
:) 10 1110 60 56
:) # ℹ 56 more rows
:) # A tibble: 66 × 3
:) resid_cd below above
:) <chr> <int> <int>
:) 1 1101 52 61
:) 2 1102 53 52
:) 3 1103 58 64
:) 4 1104 56 55
:) 5 1105 62 63
:) 6 1106 63 59
:) 7 1107 68 57
:) 8 1108 65 59
:) 9 1109 62 58
:) 10 1110 60 56
:) # ℹ 56 more rows
region_specific_edu <- region_specific_edu %>%
mutate(college_and_above = above / (below + above)) %>%
select(resid_cd, college_and_above)
region_specific_edu:) # A tibble: 66 × 2
:) resid_cd college_and_above
:) <chr> <dbl>
:) 1 1101 0.540
:) 2 1102 0.495
:) 3 1103 0.525
:) 4 1104 0.495
:) 5 1105 0.504
:) 6 1106 0.484
:) 7 1107 0.456
:) 8 1108 0.476
:) 9 1109 0.483
:) 10 1110 0.483
:) # ℹ 56 more rows
region_specific_occup <- regional_mdis %>%
ungroup() %>%
group_by(resid_cd, occup) %>%
summarise(count = n()) %>%
mutate(proportion = count / sum(count)) %>%
select(-count)
region_specific_occup:) # A tibble: 496 × 3
:) # Groups: resid_cd [66]
:) resid_cd occup proportion
:) <chr> <chr> <dbl>
:) 1 1101 관리직 0.0354
:) 2 1101 기계조작직 0.0796
:) 3 1101 기능직 0.0796
:) 4 1101 단순노무직 0.133
:) 5 1101 사무직 0.239
:) 6 1101 서비스직 0.142
:) 7 1101 전문직 0.195
:) 8 1101 판매직 0.0973
:) 9 1102 관리직 0.0381
:) 10 1102 기계조작직 0.0857
:) # ℹ 486 more rows
3.1.6.1 visualize
:) # A tibble: 66 × 2
:) resid_cd college_and_above
:) <chr> <dbl>
:) 1 1101 0.540
:) 2 1102 0.495
:) 3 1103 0.525
:) 4 1104 0.495
:) 5 1105 0.504
:) 6 1106 0.484
:) 7 1107 0.456
:) 8 1108 0.476
:) 9 1109 0.483
:) 10 1110 0.483
:) # ℹ 56 more rows
:) # A tibble: 496 × 3
:) # Groups: resid_cd [66]
:) resid_cd occup proportion
:) <chr> <chr> <dbl>
:) 1 1101 관리직 0.0354
:) 2 1101 기계조작직 0.0796
:) 3 1101 기능직 0.0796
:) 4 1101 단순노무직 0.133
:) 5 1101 사무직 0.239
:) 6 1101 서비스직 0.142
:) 7 1101 전문직 0.195
:) 8 1101 판매직 0.0973
:) 9 1102 관리직 0.0381
:) 10 1102 기계조작직 0.0857
:) # ℹ 486 more rows
region_specific <- region_specific_occup %>%
left_join(region_specific_edu, by = "resid_cd")
region_specific:) # A tibble: 496 × 4
:) # Groups: resid_cd [66]
:) resid_cd occup proportion college_and_above
:) <chr> <chr> <dbl> <dbl>
:) 1 1101 관리직 0.0354 0.540
:) 2 1101 기계조작직 0.0796 0.540
:) 3 1101 기능직 0.0796 0.540
:) 4 1101 단순노무직 0.133 0.540
:) 5 1101 사무직 0.239 0.540
:) 6 1101 서비스직 0.142 0.540
:) 7 1101 전문직 0.195 0.540
:) 8 1101 판매직 0.0973 0.540
:) 9 1102 관리직 0.0381 0.495
:) 10 1102 기계조작직 0.0857 0.495
:) # ℹ 486 more rows
3.2 WFH during COVID by industry and edu level
3.2.1 2019
wfh_status_19 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2019_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))
colnames(wfh_status_19):) [1] "동부읍면부코드" "교육정도_학력코드" "교육정도_계열코드" "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드" "현재일관련사항_7차직업대분류코드" "현재일관련사항_종사상지위코드" "교육정도컨버젼코드" "만연령" "근로장소코드" "유연근무제_활용여부" "유연근무제_활용형태1코드" "유연근무제_활용형태2코드"
:) [14] "유연근무제_향후활용예정형태1코드" "유연근무제_향후활용예정형태2코드" "연령계층코드" "가중값" "조사연월" "가구구분코드"
:) # A tibble: 60,159 × 7
:) 동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:) <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
:) 1 1 0 0 1 030 0 0
:) 2 1 0 0 1 033 0 0
:) 3 1 0 0 1 036 0 0
:) 4 1 0 0 1 038 0 0
:) 5 1 0 0 1 048 0 0
:) 6 1 0 0 1 049 0 0
:) 7 1 0 0 1 050 0 0
:) 8 1 0 0 1 051 0 0
:) 9 1 0 0 1 052 0 0
:) 10 1 0 0 1 052 0 0
:) # ℹ 60,149 more rows
colnames(wfh_status_19) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")
wfh_status_19 <- wfh_status_19 %>%
mutate(ind = as.character(ind),
occup = as.character(occup),
edu = as.character(edu)) %>%
filter(ind != '0') %>%
filter(occup != '0') %>%
filter(edu != '0') %>%
filter(dong_ep_myeon == '1') %>%
mutate(age = as.numeric(age)) %>%
filter(age >= 20 & age < 61)
wfh_status_19 <- wfh_status_19 %>%
mutate(ind = recode(ind,
'A' = '농어업',
'B' = '광업',
'C' = '제조업',
'D' = '전기가스업',
'E' = '수도하수업',
'F' = '건설업',
'G' = '도소매업',
'H' = '운수창고업',
'I' = '숙박음식업',
'J' = '정보통신업',
'K' = '금융보험업',
'L' = '부동산업',
'M' = '전문과학기술업',
'N' = '사업지원업',
'O' = '공공행정',
'P' = '교육',
'Q' = '보건복지',
'R' = '예술스포츠여가',
'S' = '협회및개인서비스',
'T' = 'nu1',
'U' = 'nu2')) %>%
filter(!ind %in% c("nu1", "nu2")) %>%
filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
mutate(occup = as.character(occup)) %>%
mutate(occup = recode(occup,
'1' = '관리직',
'2' = '전문직',
'3' = '사무직',
'4' = '서비스직',
'5' = '판매직',
'6' = '농어업숙련직',
'7' = '기능직',
'8' = '기계조작직',
'9' = '단순노무직')) %>%
mutate(edu = as.character(edu)) %>%
mutate(edu = recode(edu,
'1' = '초졸이하',
'2' = '중졸',
'3' = '고졸',
'4' = '초대졸',
'5' = '대졸',
'6' = '대학원졸')) %>%
mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))
wfh_status_19 <- wfh_status_19 %>%
mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
mutate(flex_work_yesno = recode(flex_work_yesno,
'0' = '없음',
'1' = '예',
'2' = '아니오')) %>%
mutate(flex_work_type = recode(flex_work_type,
'0' = '없음',
'1' = '근로시간단축근무제',
'2' = '시차출퇴근제',
'3' = '선택적근무시간제',
'4' = '재택및원격근무제',
'5' = '탄력적근무제',
'6' = '기타유형')) %>%
mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
group_by(ind, edu_college) %>%
summarise(count = n(),
WFH = sum(flex_work_type_isWFH)) %>%
mutate(WFH_ratio_19 = WFH / count * 100)
wfh_status_19:) # A tibble: 30 × 5
:) # Groups: ind [15]
:) ind edu_college count WFH WFH_ratio_19
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 건설업 대졸미만 1271 0 0
:) 2 건설업 대졸이상 435 0 0
:) 3 공공행정 대졸미만 297 0 0
:) 4 공공행정 대졸이상 659 2 0.303
:) 5 교육 대졸미만 406 1 0.246
:) 6 교육 대졸이상 1443 5 0.347
:) 7 금융보험업 대졸미만 333 1 0.300
:) 8 금융보험업 대졸이상 438 3 0.685
:) 9 도소매업 대졸미만 2040 4 0.196
:) 10 도소매업 대졸이상 979 5 0.511
:) # ℹ 20 more rows
3.2.2 2020
wfh_status_20 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2020_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))
colnames(wfh_status_20):) [1] "동부읍면부코드" "교육정도_학력코드" "교육정도_계열코드" "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드" "현재일관련사항_7차직업대분류코드" "현재일관련사항_종사상지위코드" "교육정도컨버젼코드" "만연령" "근로장소코드" "유연근무제_활용여부" "유연근무제_활용형태1코드" "유연근무제_활용형태2코드"
:) [14] "유연근무제_향후활용예정형태1코드" "유연근무제_향후활용예정형태2코드" "연령계층코드" "가중값" "조사연월" "가구구분코드"
:) # A tibble: 59,546 × 7
:) 동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:) <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
:) 1 1 0 0 1 028 0 0
:) 2 1 0 0 1 031 0 0
:) 3 1 0 0 1 038 0 0
:) 4 1 0 0 1 039 0 0
:) 5 1 0 0 1 048 0 0
:) 6 1 0 0 1 049 0 0
:) 7 1 0 0 1 049 0 0
:) 8 1 0 0 1 051 0 0
:) 9 1 0 0 1 051 0 0
:) 10 1 0 0 1 052 0 0
:) # ℹ 59,536 more rows
colnames(wfh_status_20) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")
wfh_status_20 <- wfh_status_20 %>%
mutate(ind = as.character(ind),
occup = as.character(occup),
edu = as.character(edu)) %>%
filter(ind != '0') %>%
filter(occup != '0') %>%
filter(edu != '0') %>%
filter(dong_ep_myeon == '1') %>%
mutate(age = as.numeric(age)) %>%
filter(age >= 20 & age < 61)
wfh_status_20 <- wfh_status_20 %>%
mutate(ind = recode(ind,
'A' = '농어업',
'B' = '광업',
'C' = '제조업',
'D' = '전기가스업',
'E' = '수도하수업',
'F' = '건설업',
'G' = '도소매업',
'H' = '운수창고업',
'I' = '숙박음식업',
'J' = '정보통신업',
'K' = '금융보험업',
'L' = '부동산업',
'M' = '전문과학기술업',
'N' = '사업지원업',
'O' = '공공행정',
'P' = '교육',
'Q' = '보건복지',
'R' = '예술스포츠여가',
'S' = '협회및개인서비스',
'T' = 'nu1',
'U' = 'nu2')) %>%
filter(!ind %in% c("nu1", "nu2")) %>%
filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
mutate(occup = as.character(occup)) %>%
mutate(occup = recode(occup,
'1' = '관리직',
'2' = '전문직',
'3' = '사무직',
'4' = '서비스직',
'5' = '판매직',
'6' = '농어업숙련직',
'7' = '기능직',
'8' = '기계조작직',
'9' = '단순노무직')) %>%
mutate(edu = as.character(edu)) %>%
mutate(edu = recode(edu,
'1' = '초졸이하',
'2' = '중졸',
'3' = '고졸',
'4' = '초대졸',
'5' = '대졸',
'6' = '대학원졸')) %>%
mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))
wfh_status_20 <- wfh_status_20 %>%
mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
mutate(flex_work_yesno = recode(flex_work_yesno,
'0' = '없음',
'1' = '예',
'2' = '아니오')) %>%
mutate(flex_work_type = recode(flex_work_type,
'0' = '없음',
'1' = '근로시간단축근무제',
'2' = '시차출퇴근제',
'3' = '선택적근무시간제',
'4' = '재택및원격근무제',
'5' = '탄력적근무제',
'6' = '기타유형')) %>%
mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
group_by(ind, edu_college) %>%
summarise(count = n(),
WFH = sum(flex_work_type_isWFH)) %>%
mutate(WFH_ratio_20 = WFH / count * 100)
wfh_status_20:) # A tibble: 30 × 5
:) # Groups: ind [15]
:) ind edu_college count WFH WFH_ratio_20
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 건설업 대졸미만 1325 3 0.226
:) 2 건설업 대졸이상 425 2 0.471
:) 3 공공행정 대졸미만 317 3 0.946
:) 4 공공행정 대졸이상 683 26 3.81
:) 5 교육 대졸미만 401 3 0.748
:) 6 교육 대졸이상 1393 76 5.46
:) 7 금융보험업 대졸미만 305 9 2.95
:) 8 금융보험업 대졸이상 421 11 2.61
:) 9 도소매업 대졸미만 1940 6 0.309
:) 10 도소매업 대졸이상 935 19 2.03
:) # ℹ 20 more rows
3.2.3 2021
wfh_status_21 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2021_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))
colnames(wfh_status_21):) [1] "동부읍면부코드" "교육정도_학력코드" "교육정도_계열코드" "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드" "현재일관련사항_7차직업대분류코드" "현재일관련사항_종사상지위코드" "교육정도컨버젼코드" "만연령" "근로장소코드" "유연근무제_활용여부" "유연근무제_활용형태1코드" "유연근무제_활용형태2코드"
:) [14] "유연근무제_향후활용예정형태1코드" "유연근무제_향후활용예정형태2코드" "연령계층코드" "가중값" "조사연월" "가구구분코드"
:) # A tibble: 58,882 × 7
:) 동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:) <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
:) 1 1 0 0 1 031 0 0
:) 2 1 0 0 1 032 0 0
:) 3 1 0 0 1 035 0 0
:) 4 1 0 0 1 039 0 0
:) 5 1 0 0 1 039 0 0
:) 6 1 0 0 1 040 0 0
:) 7 1 0 0 1 040 0 0
:) 8 1 0 0 1 047 0 0
:) 9 1 0 0 1 050 0 0
:) 10 1 0 0 1 052 0 0
:) # ℹ 58,872 more rows
colnames(wfh_status_21) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")
wfh_status_21 <- wfh_status_21 %>%
mutate(ind = as.character(ind),
occup = as.character(occup),
edu = as.character(edu)) %>%
filter(ind != '0') %>%
filter(occup != '0') %>%
filter(edu != '0') %>%
filter(dong_ep_myeon == '1') %>%
mutate(age = as.numeric(age)) %>%
filter(age >= 20 & age < 61)
wfh_status_21 <- wfh_status_21 %>%
mutate(ind = recode(ind,
'A' = '농어업',
'B' = '광업',
'C' = '제조업',
'D' = '전기가스업',
'E' = '수도하수업',
'F' = '건설업',
'G' = '도소매업',
'H' = '운수창고업',
'I' = '숙박음식업',
'J' = '정보통신업',
'K' = '금융보험업',
'L' = '부동산업',
'M' = '전문과학기술업',
'N' = '사업지원업',
'O' = '공공행정',
'P' = '교육',
'Q' = '보건복지',
'R' = '예술스포츠여가',
'S' = '협회및개인서비스',
'T' = 'nu1',
'U' = 'nu2')) %>%
filter(!ind %in% c("nu1", "nu2")) %>%
filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
mutate(occup = as.character(occup)) %>%
mutate(occup = recode(occup,
'1' = '관리직',
'2' = '전문직',
'3' = '사무직',
'4' = '서비스직',
'5' = '판매직',
'6' = '농어업숙련직',
'7' = '기능직',
'8' = '기계조작직',
'9' = '단순노무직')) %>%
mutate(edu = as.character(edu)) %>%
mutate(edu = recode(edu,
'1' = '초졸이하',
'2' = '중졸',
'3' = '고졸',
'4' = '초대졸',
'5' = '대졸',
'6' = '대학원졸')) %>%
mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))
wfh_status_21 <- wfh_status_21 %>%
mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
mutate(flex_work_yesno = recode(flex_work_yesno,
'0' = '없음',
'1' = '예',
'2' = '아니오')) %>%
mutate(flex_work_type = recode(flex_work_type,
'0' = '없음',
'1' = '근로시간단축근무제',
'2' = '시차출퇴근제',
'3' = '선택적근무시간제',
'4' = '재택및원격근무제',
'5' = '탄력적근무제',
'6' = '기타유형')) %>%
mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
group_by(ind, edu_college) %>%
summarise(count = n(),
WFH = sum(flex_work_type_isWFH)) %>%
mutate(WFH_ratio_21 = WFH / count * 100)
wfh_status_21:) # A tibble: 30 × 5
:) # Groups: ind [15]
:) ind edu_college count WFH WFH_ratio_21
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 건설업 대졸미만 1339 3 0.224
:) 2 건설업 대졸이상 426 11 2.58
:) 3 공공행정 대졸미만 307 11 3.58
:) 4 공공행정 대졸이상 713 47 6.59
:) 5 교육 대졸미만 420 6 1.43
:) 6 교육 대졸이상 1434 111 7.74
:) 7 금융보험업 대졸미만 315 22 6.98
:) 8 금융보험업 대졸이상 384 45 11.7
:) 9 도소매업 대졸미만 1847 6 0.325
:) 10 도소매업 대졸이상 906 31 3.42
:) # ℹ 20 more rows
3.2.4 stats
3.2.4.1 제조업
:) # A tibble: 2 × 5
:) ind edu_college count WFH WFH_ratio_21
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 제조업 대졸미만 2437 16 0.657
:) 2 제조업 대졸이상 1176 45 3.83
wfh_status_21 %>%
ungroup() %>%
filter(ind == "제조업") %>%
ungroup() %>%
group_by(ind) %>%
summarise(count = sum(count),
WFH = sum(WFH)) %>%
mutate(WFH / count * 100):) # A tibble: 1 × 4
:) ind count WFH `WFH/count * 100`
:) <chr> <int> <dbl> <dbl>
:) 1 제조업 3613 61 1.69
3.2.4.2 금융보험업
:) # A tibble: 2 × 5
:) ind edu_college count WFH WFH_ratio_21
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 금융보험업 대졸미만 315 22 6.98
:) 2 금융보험업 대졸이상 384 45 11.7
wfh_status_21 %>%
ungroup() %>%
filter(ind == "금융보험업") %>%
ungroup() %>%
group_by(ind) %>%
summarise(count = sum(count),
WFH = sum(WFH)) %>%
mutate(WFH / count * 100):) # A tibble: 1 × 4
:) ind count WFH `WFH/count * 100`
:) <chr> <int> <dbl> <dbl>
:) 1 금융보험업 699 67 9.59
3.2.4.3 도소매업
:) # A tibble: 2 × 5
:) ind edu_college count WFH WFH_ratio_21
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 도소매업 대졸미만 1847 6 0.325
:) 2 도소매업 대졸이상 906 31 3.42
wfh_status_21 %>%
ungroup() %>%
filter(ind == "도소매업") %>%
ungroup() %>%
group_by(ind) %>%
summarise(count = sum(count),
WFH = sum(WFH)) %>%
mutate(WFH / count * 100):) # A tibble: 1 × 4
:) ind count WFH `WFH/count * 100`
:) <chr> <int> <dbl> <dbl>
:) 1 도소매업 2753 37 1.34
3.2.4.4 숙박음식업
:) # A tibble: 2 × 5
:) ind edu_college count WFH WFH_ratio_21
:) <chr> <chr> <int> <dbl> <dbl>
:) 1 숙박음식업 대졸미만 1328 0 0
:) 2 숙박음식업 대졸이상 367 2 0.545
wfh_status_21 %>%
ungroup() %>%
filter(ind == "숙박음식업") %>%
ungroup() %>%
group_by(ind) %>%
summarise(count = sum(count),
WFH = sum(WFH)) %>%
mutate(WFH / count * 100):) # A tibble: 1 × 4
:) ind count WFH `WFH/count * 100`
:) <chr> <int> <dbl> <dbl>
:) 1 숙박음식업 1695 2 0.118
3.2.4.5 한번에
wfh_status_21 %>%
ungroup() %>%
group_by(ind) %>%
summarise(count = sum(count),
WFH = sum(WFH)) %>%
mutate(WFH / count * 100) %>%
arrange(`WFH/count * 100`):) # A tibble: 15 × 4
:) ind count WFH `WFH/count * 100`
:) <chr> <int> <dbl> <dbl>
:) 1 숙박음식업 1695 2 0.118
:) 2 협회및개인서비스 925 6 0.649
:) 3 보건복지 1997 13 0.651
:) 4 건설업 1765 14 0.793
:) 5 예술스포츠여가 381 5 1.31
:) 6 도소매업 2753 37 1.34
:) 7 부동산업 365 6 1.64
:) 8 제조업 3613 61 1.69
:) 9 운수창고업 1127 20 1.77
:) 10 사업지원업 966 25 2.59
:) 11 공공행정 1020 58 5.69
:) 12 교육 1854 117 6.31
:) 13 전문과학기술업 1011 67 6.63
:) 14 금융보험업 699 67 9.59
:) 15 정보통신업 748 84 11.2
3.2.5 sum up
wfh_status_19202122 <- wfh_status_19 %>%
left_join(wfh_status_20, by = c('ind', 'edu_college')) %>%
left_join(wfh_status_21, by = c('ind', 'edu_college')) %>%
select(ind, edu_college, WFH_ratio_19, WFH_ratio_20, WFH_ratio_21) %>%
rename(r2019 = WFH_ratio_19,
r2020 = WFH_ratio_20,
r2021 = WFH_ratio_21) %>%
pivot_longer(3:5, names_to = 'year', values_to = 'WFH_ratio') %>%
ungroup()
wfh_status_19202122:) # A tibble: 90 × 4
:) ind edu_college year WFH_ratio
:) <chr> <chr> <chr> <dbl>
:) 1 건설업 대졸미만 r2019 0
:) 2 건설업 대졸미만 r2020 0.226
:) 3 건설업 대졸미만 r2021 0.224
:) 4 건설업 대졸이상 r2019 0
:) 5 건설업 대졸이상 r2020 0.471
:) 6 건설업 대졸이상 r2021 2.58
:) 7 공공행정 대졸미만 r2019 0
:) 8 공공행정 대졸미만 r2020 0.946
:) 9 공공행정 대졸미만 r2021 3.58
:) 10 공공행정 대졸이상 r2019 0.303
:) # ℹ 80 more rows
wfh_status_19202122 <- wfh_status_19202122 %>%
mutate(year = recode(year,
'r2019' = '2019',
'r2020' = '2020',
'r2021' = '2021'
))
wfh_status_19202122:) # A tibble: 90 × 4
:) ind edu_college year WFH_ratio
:) <chr> <chr> <chr> <dbl>
:) 1 건설업 대졸미만 2019 0
:) 2 건설업 대졸미만 2020 0.226
:) 3 건설업 대졸미만 2021 0.224
:) 4 건설업 대졸이상 2019 0
:) 5 건설업 대졸이상 2020 0.471
:) 6 건설업 대졸이상 2021 2.58
:) 7 공공행정 대졸미만 2019 0
:) 8 공공행정 대졸미만 2020 0.946
:) 9 공공행정 대졸미만 2021 3.58
:) 10 공공행정 대졸이상 2019 0.303
:) # ℹ 80 more rows
4 representation of two modes
4.1 origin
4.1.1 import
kosis_ingu_edu_emd_2020 <- readxl::read_excel('data_residential/kosis_ingu_edu_emd_2020.xlsx')
colnames(kosis_ingu_edu_emd_2020) <- c("adm_cd", "adm_nm", "nu1", "nu2", "nu3", "age", "nu4", "edu_level",
"nu5", "nu6", "nu7", "count")
kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
slice(-1) %>%
select(adm_cd, adm_nm, age, edu_level, count) %>%
filter(str_sub(adm_cd, 1, 5) != "23320") %>% # 23320이 옹진군임
filter(as.numeric(adm_cd) > 0 & str_length(adm_cd) > 5) %>%
filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
left_join(kosis_sgg, by = "sgg_cd") %>%
select(-sgg_cd) %>%
mutate(adm_nm = str_c(sgg_nm, "_", adm_nm))
kosis_ingu_edu_emd_2020:) # A tibble: 35,776 × 6
:) adm_cd adm_nm age edu_level count sgg_nm
:) <chr> <chr> <chr> <chr> <chr> <chr>
:) 1 1101053 종로구_사직동 20-29세 계 1068 종로구
:) 2 1101053 종로구_사직동 20-29세 초등학교 <NA> 종로구
:) 3 1101053 종로구_사직동 20-29세 중학교 <NA> 종로구
:) 4 1101053 종로구_사직동 20-29세 고등학교 118 종로구
:) 5 1101053 종로구_사직동 20-29세 대학교(2,3년제) 162 종로구
:) 6 1101053 종로구_사직동 20-29세 대학교(4년제 이상) 671 종로구
:) 7 1101053 종로구_사직동 20-29세 대학원(석박사 과정) 117 종로구
:) 8 1101053 종로구_사직동 20-29세 받지 않았음(미취학 포함) <NA> 종로구
:) 9 1101053 종로구_사직동 30-39세 계 1266 종로구
:) 10 1101053 종로구_사직동 30-39세 초등학교 <NA> 종로구
:) # ℹ 35,766 more rows
kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
mutate(count = as.numeric(count)) %>%
mutate(count = replace_na(count, 0)) %>%
group_by(adm_cd, adm_nm, edu_level) %>% # 나이는 모두 20~59세 사이로 한정함.
summarise(count = sum(count)) %>%
filter(edu_level %in% c("계", "대학교(4년제 이상)", "대학원(석박사 과정)")) %>%
pivot_wider(names_from = edu_level,
values_from = count) %>%
ungroup() %>%
mutate(college = (`대학교(4년제 이상)` + `대학원(석박사 과정)`) / `계`) %>%
select(adm_cd, adm_nm, college, 계) %>%
rename(pop = 계) %>%
ungroup()
kosis_ingu_edu_emd_2020:) # A tibble: 1,118 × 4
:) adm_cd adm_nm college pop
:) <chr> <chr> <dbl> <dbl>
:) 1 1101053 종로구_사직동 0.808 5034
:) 2 1101054 종로구_삼청동 0.706 1328
:) 3 1101055 종로구_부암동 0.708 5471
:) 4 1101056 종로구_평창동 0.779 9791
:) 5 1101057 종로구_무악동 0.780 4532
:) 6 1101058 종로구_교남동 0.675 6043
:) 7 1101060 종로구_가회동 0.603 2300
:) 8 1101061 종로구_종로1·2·3·4가동 0.682 3543
:) 9 1101063 종로구_종로5·6가동 0.537 3163
:) 10 1101064 종로구_이화동 0.645 5230
:) # ℹ 1,108 more rows
kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
mutate(adm_cd = replace(adm_cd, adm_nm == "구로구_오류2동", "1117068"), # 생활이동데이터에 기반한 멤버십에 항동이 따로 없으므로 이 단계에서는 필수적으로 이렇게 해야 함.
adm_cd = replace(adm_cd, adm_nm == "구로구_항동", "1117068"),
adm_nm = replace(adm_nm, adm_nm == "구로구_항동", "구로구_오류2동")) %>%
mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%
group_by(adm_nm, adm_cd) %>%
summarise(college = mean(college)) %>%
ungroup() %>%
arrange(adm_cd)
kosis_ingu_edu_emd_2020:) # A tibble: 1,117 × 3
:) adm_nm adm_cd college
:) <chr> <chr> <dbl>
:) 1 종로구_사직동 1101053 0.808
:) 2 종로구_삼청동 1101054 0.706
:) 3 종로구_부암동 1101055 0.708
:) 4 종로구_평창동 1101056 0.779
:) 5 종로구_무악동 1101057 0.780
:) 6 종로구_교남동 1101058 0.675
:) 7 종로구_가회동 1101060 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 종로구_종로5.6가동 1101063 0.537
:) 10 종로구_이화동 1101064 0.645
:) # ℹ 1,107 more rows
:) adm_nm adm_cd college
:) 0 0 0
4.1.2 join to sf
4.1.2.1 matched
dong.sf_resid_tb_1 <- dong.sf %>%
st_drop_geometry() %>%
left_join(kosis_ingu_edu_emd_2020, by = "adm_nm") %>% # 코드 다른 게 있을까봐 이름만을 기준으로로
filter(!is.na(college)) # 다행히도 안 중요한 지역들이 매칭 안됨.
dong.sf_resid_tb_1 :) # A tibble: 1,116 × 4
:) adm_nm adm_cd.x adm_cd.y college
:) <chr> <chr> <chr> <dbl>
:) 1 종로구_사직동 1101053 1101053 0.808
:) 2 종로구_삼청동 1101054 1101054 0.706
:) 3 종로구_부암동 1101055 1101055 0.708
:) 4 종로구_평창동 1101056 1101056 0.779
:) 5 종로구_무악동 1101057 1101057 0.780
:) 6 종로구_교남동 1101058 1101058 0.675
:) 7 종로구_가회동 1101060 1101060 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 1101061 0.682
:) 9 종로구_종로5.6가동 1101063 1101063 0.537
:) 10 종로구_이화동 1101064 1101064 0.645
:) # ℹ 1,106 more rows
4.1.2.2 unmatched(imputation)
dong.sf_resid_tb_2 <- dong.sf %>%
st_drop_geometry() %>%
left_join(kosis_ingu_edu_emd_2020, by = "adm_nm") %>% # 코드 다른 게 있을까봐 이름만을 기준으로로
filter(is.na(college)) # 다행히도 안 중요한 지역들이 매칭 안됨.
dong.sf_resid_tb_2:) # A tibble: 7 × 4
:) adm_nm adm_cd.x adm_cd.y college
:) <chr> <chr> <chr> <dbl>
:) 1 파주시_진동면 3120021 <NA> NA
:) 2 파주시_장단면 3120026 <NA> NA
:) 3 파주시_진서면 3120027 <NA> NA
:) 4 광주시_쌍령동 3125056 <NA> NA
:) 5 광주시_탄벌동 3125057 <NA> NA
:) 6 광주시_광남1동 3125058 <NA> NA
:) 7 광주시_광남2동 3125059 <NA> NA
mean_paju <- kosis_ingu_edu_emd_2020 %>%
filter(str_sub(adm_nm, 1, 3) == "파주시") %>%
summarise(college = mean(college)) %>%
as.numeric()
mean_paju:) [1] 0.326
mean_gwangju <- kosis_ingu_edu_emd_2020 %>%
filter(str_sub(adm_nm, 1, 3) == "광주시") %>%
summarise(college = mean(college)) %>%
as.numeric()
mean_gwangju:) [1] 0.327
dong.sf_resid_tb_2 <- dong.sf_resid_tb_2 %>%
mutate(college = ifelse(str_sub(adm_nm, 1, 3) == "파주시", mean_paju, mean_gwangju))
dong.sf_resid_tb_2:) # A tibble: 7 × 4
:) adm_nm adm_cd.x adm_cd.y college
:) <chr> <chr> <chr> <dbl>
:) 1 파주시_진동면 3120021 <NA> 0.326
:) 2 파주시_장단면 3120026 <NA> 0.326
:) 3 파주시_진서면 3120027 <NA> 0.326
:) 4 광주시_쌍령동 3125056 <NA> 0.327
:) 5 광주시_탄벌동 3125057 <NA> 0.327
:) 6 광주시_광남1동 3125058 <NA> 0.327
:) 7 광주시_광남2동 3125059 <NA> 0.327
4.1.2.3 integrate
:) # A tibble: 1,123 × 4
:) adm_nm adm_cd.x adm_cd.y college
:) <chr> <chr> <chr> <dbl>
:) 1 종로구_사직동 1101053 1101053 0.808
:) 2 종로구_삼청동 1101054 1101054 0.706
:) 3 종로구_부암동 1101055 1101055 0.708
:) 4 종로구_평창동 1101056 1101056 0.779
:) 5 종로구_무악동 1101057 1101057 0.780
:) 6 종로구_교남동 1101058 1101058 0.675
:) 7 종로구_가회동 1101060 1101060 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 1101061 0.682
:) 9 종로구_종로5.6가동 1101063 1101063 0.537
:) 10 종로구_이화동 1101064 1101064 0.645
:) # ℹ 1,113 more rows
:) adm_nm adm_cd.x adm_cd.y college
:) 0 0 7 0
:) # A tibble: 9 × 4
:) adm_nm adm_cd.x adm_cd.y college
:) <chr> <chr> <chr> <dbl>
:) 1 광주시_경안동 3125054 3125051 0.319
:) 2 광주시_송정동 3125055 3125052 0.305
:) 3 파주시_진동면 3120021 <NA> 0.326
:) 4 파주시_장단면 3120026 <NA> 0.326
:) 5 파주시_진서면 3120027 <NA> 0.326
:) 6 광주시_쌍령동 3125056 <NA> 0.327
:) 7 광주시_탄벌동 3125057 <NA> 0.327
:) 8 광주시_광남1동 3125058 <NA> 0.327
:) 9 광주시_광남2동 3125059 <NA> 0.327
dong.sf_resid_tb <- dong.sf_resid_tb %>%
select(-adm_cd.y) %>%
rename(adm_cd = adm_cd.x)
dong.sf_resid_tb:) # A tibble: 1,123 × 3
:) adm_nm adm_cd college
:) <chr> <chr> <dbl>
:) 1 종로구_사직동 1101053 0.808
:) 2 종로구_삼청동 1101054 0.706
:) 3 종로구_부암동 1101055 0.708
:) 4 종로구_평창동 1101056 0.779
:) 5 종로구_무악동 1101057 0.780
:) 6 종로구_교남동 1101058 0.675
:) 7 종로구_가회동 1101060 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 종로구_종로5.6가동 1101063 0.537
:) 10 종로구_이화동 1101064 0.645
:) # ℹ 1,113 more rows
dong.sf_resid <- dong.sf %>%
left_join(dong.sf_resid_tb, by = c("adm_nm", "adm_cd")) # 여기서는 코드까지 맞춰도 다 맞음
dong.sf_resid:) Simple feature collection with 1123 features and 3 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 1,123 × 4
:) adm_nm adm_cd geometry college
:) <chr> <chr> <MULTIPOLYGON [m]> <dbl>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9... 0.808
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9... 0.706
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9... 0.708
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9... 0.779
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9... 0.780
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9... 0.675
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9... 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9... 0.682
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9... 0.537
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9... 0.645
:) # ℹ 1,113 more rows
:) adm_nm adm_cd geometry college
:) 0 0 0 0
4.1.3 figure 4a
:) Sparse geometry binary predicate list of length 1123, where the predicate was `intersects'
:) first 10 elements:
:) 1: 1, 5, 6, 8, 16, 18, 20
:) 2: 2, 3, 7, 8, 16, 17, 126
:) 3: 2, 3, 4, 16, 126, 195, 197
:) 4: 3, 4, 117, 118, 126, 178, 189, 197, 801
:) 5: 1, 5, 6, 16, 193, 194, 196
:) 6: 1, 5, 6, 18, 193, 203
:) 7: 2, 7, 8, 17
:) 8: 1, 2, 7, 8, 9, 10, 16, 17, 20, 24
:) 9: 8, 9, 10, 11, 12, 23, 24, 28
:) 10: 8, 9, 10, 12, 17, 127
:) [1] 7 7 7 9 7 6 4 10 8 6 8 6 5 6 6 8 7 7 8 7 8 8 6 6 6 7 6 10 7 6 6 8 5 9 7 7 6 5 8 6 6 5 10 5 7 7 8 10 7 6 7 7 5 7 7 7 8 7 5 7 9 8 7 7 8 9 5 7 5 6 8 8 7 7 7
:) [ reached getOption("max.print") -- omitted 1048 entries ]
:) [1] 1
:) Simple feature collection with 1123 features and 4 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 1,123 × 5
:) adm_nm adm_cd geometry college num_of_neighbors
:) * <chr> <chr> <MULTIPOLYGON [m]> <dbl> <int>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9... 0.808 7
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9... 0.706 7
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9... 0.708 7
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9... 0.779 9
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9... 0.780 7
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9... 0.675 6
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9... 0.603 4
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9... 0.682 10
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9... 0.537 8
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9... 0.645 6
:) # ℹ 1,113 more rows
4.2 destination
4.2.1 import
# 여기도 항동 없음
jongsaja <- readxl::read_xls("data_industrial/economic_activity_survey_by_industry.xls")
colnames(jongsaja) <- c("sgg_nm", "adm_nm", "total_employ",
'농어업', '광업', '제조업', '전기가스업', '수도하수업', '건설업', '도소매업', '운수창고업', '숙박음식업', '정보통신업', '금융보험업', '부동산업', '전문과학기술업', '사업지원업', '공공행정', '교육', '보건복지', '예술스포츠여가', '협회및개인서비스') jongsaja <- jongsaja %>%
slice(-1) %>%
filter(adm_nm != "소계") %>%
mutate_at(4:22, as.numeric) %>%
mutate_at(4:22, tidyr::replace_na, 0) %>%
mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", "."),
adm_nm = replace(adm_nm, adm_nm=="여의도동", "여의동")) %>%
mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
select(-c(sgg_nm, total_employ))
colSums(is.na(jongsaja)):) adm_nm 농어업 광업 제조업 전기가스업 수도하수업 건설업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정 교육 보건복지 예술스포츠여가 협회및개인서비스
:) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
:) adm_nm 농어업 광업 제조업 전기가스업 수도하수업 건설업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정 교육 보건복지 예술스포츠여가 협회및개인서비스
:) 1 종로구_사직동 12 2 359 11 34 688 2726 2513 4124 2133 7794 1091 8098 7408 6088 786 1096 1653 906
:) 2 종로구_삼청동 0 0 40 0 0 57 910 93 874 76 63 35 348 108 1291 297 30 185 194
:) 3 종로구_부암동 0 1 158 0 0 76 550 39 394 126 29 91 110 73 60 1190 183 49 325
:) [ reached 'max' / getOption("max.print") -- omitted 421 rows ]
4.2.2 aggregate by community
b <- dong.sf %>%
filter(str_sub(adm_cd, 1, 2) == "11") %>%
st_drop_geometry() %>%
select(adm_cd, adm_nm)
jongsaja %>%
left_join(b, by = c("adm_nm" = "adm_nm")) %>%
relocate(adm_cd, adm_nm) %>%
is.na() %>%
colSums():) adm_cd adm_nm 농어업 광업 제조업 전기가스업 수도하수업 건설업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정 교육 보건복지 예술스포츠여가 협회및개인서비스
:) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
jongsaja_comm <- jongsaja %>%
left_join(b, by = "adm_nm") %>%
relocate(adm_cd, adm_nm) %>%
left_join(membership_info_eng, by = "adm_cd") %>%
relocate(adm_cd, member, name) %>%
group_by(name) %>%
summarise(across(is.numeric, ~ sum(.x, na.rm = TRUE))) %>%
select(-c("건설업", "농어업", "광업", "수도하수업", "전기가스업")) #건설업 집계 못 믿을만함
jongsaja_comm:) # A tibble: 54 × 15
:) name 제조업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정 교육 보건복지 예술스포츠여가 협회및개인서비스
:) <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 Anam 3079 9266 3677 9318 1131 1057 2984 2209 3913 2916 16613 9693 1628 3562
:) 2 Balsan 2733 10981 5718 6454 1026 1369 1579 1670 3434 2375 6081 8513 1386 2901
:) 3 Bangbae 1730 14864 3598 9023 5486 4439 4306 7100 15193 1055 6795 5852 1712 4631
:) 4 Banghak 3657 10353 8533 7833 645 1086 2678 1689 2019 3840 6207 11065 1677 4260
:) 5 Bangi 2631 10265 1819 6173 2125 992 2141 3624 9561 1119 7238 11880 2185 2045
:) 6 Banpo 840 11117 2459 7518 2434 1076 2825 4530 4071 674 5285 6986 2309 1906
:) 7 Changshin 6232 15381 1587 3928 1920 2773 1159 1251 2170 531 938 1565 519 1484
:) 8 Cheongdam 6649 35218 2281 16725 12338 5342 7477 25307 16321 3234 5466 8937 1994 8601
:) 9 Chunghyeon 1002 4467 1088 2405 2293 5324 745 3589 3625 3030 2323 1035 263 1411
:) 10 Daechi 3914 23571 2861 9480 9407 9381 6669 15237 12570 469 10135 7578 1870 3491
:) # ℹ 44 more rows
Manufacturing (MF) Wholesale and retail trade (WR) Transportation and storage (TS) Accommodation and food service activities (AF) Information and communication (IC) Financial and insurance activities (FI) Real estate activities (RE) Professional, scientific and technical activities (PT) Business facilities management and business support services; rental and leasing activities (BF) Public administration and defence; compulsory social security (PA) Education (EC) Human health and social work activities (HS) Arts, sports and recreation related services (RS) Membership organizations, repair and other personal services (PS)
colnames(jongsaja_comm) <- c("name", "MF", "WR", "TS", "AF", "IC", "FI", "RE", "PT", "BF", "PA", "EC", "HS", "RS", "PS"):) # A tibble: 54 × 15
:) name MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 Anam 3079 9266 3677 9318 1131 1057 2984 2209 3913 2916 16613 9693 1628 3562
:) 2 Balsan 2733 10981 5718 6454 1026 1369 1579 1670 3434 2375 6081 8513 1386 2901
:) 3 Bangbae 1730 14864 3598 9023 5486 4439 4306 7100 15193 1055 6795 5852 1712 4631
:) 4 Banghak 3657 10353 8533 7833 645 1086 2678 1689 2019 3840 6207 11065 1677 4260
:) 5 Bangi 2631 10265 1819 6173 2125 992 2141 3624 9561 1119 7238 11880 2185 2045
:) 6 Banpo 840 11117 2459 7518 2434 1076 2825 4530 4071 674 5285 6986 2309 1906
:) 7 Changshin 6232 15381 1587 3928 1920 2773 1159 1251 2170 531 938 1565 519 1484
:) 8 Cheongdam 6649 35218 2281 16725 12338 5342 7477 25307 16321 3234 5466 8937 1994 8601
:) 9 Chunghyeon 1002 4467 1088 2405 2293 5324 745 3589 3625 3030 2323 1035 263 1411
:) 10 Daechi 3914 23571 2861 9480 9407 9381 6669 15237 12570 469 10135 7578 1870 3491
:) # ℹ 44 more rows
4.2.3 filter out using threshold
4.2.3.1 employment numbers and density of each employment district
jongsaja_comm_rowsums <- jongsaja_comm %>%
column_to_rownames("name") %>%
as.data.frame() %>%
rowSums()
jongsaja_comm_rowsums <- cbind(jongsaja_comm_rowsums, jongsaja_comm$name) %>%
as_tibble()
colnames(jongsaja_comm_rowsums) <- c("total_emp", "name")
jongsaja_comm_rowsums <- jongsaja_comm_rowsums %>%
mutate(total_emp = as.numeric(total_emp))
jongsaja_comm_rowsums:) # A tibble: 54 × 2
:) total_emp name
:) <dbl> <chr>
:) 1 71046 Anam
:) 2 56220 Balsan
:) 3 85784 Bangbae
:) 4 65542 Banghak
:) 5 63798 Bangi
:) 6 54030 Banpo
:) 7 41438 Changshin
:) 8 155890 Cheongdam
:) 9 32600 Chunghyeon
:) 10 116633 Daechi
:) # ℹ 44 more rows
dong.sf_commune <- dong.sf_commune %>%
left_join(jongsaja_comm_rowsums, by = "name") %>%
mutate(density_emp = total_emp / as.numeric(area) * 10000) %>% # 1m^2가 아닌 100m^2 당 고용인구
mutate(log_total_emp = log(total_emp),
log_density_emp = log(density_emp))
dong.sf_commune:) Simple feature collection with 54 features and 6 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 54 × 7
:) name geometry area total_emp density_emp log_total_emp log_density_emp
:) * <chr> <POLYGON [m]> [m^2] <dbl> <dbl> <dbl> <dbl>
:) 1 Anam ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236. 71046 45.0 11.2 3.81
:) 2 Balsan ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ... 8854094. 56220 63.5 10.9 4.15
:) 3 Bangbae ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435. 85784 56.1 11.4 4.03
:) 4 Banghak ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974. 65542 33.6 11.1 3.51
:) 5 Bangi ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966. 63798 61.6 11.1 4.12
:) 6 Banpo ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ... 6799150. 54030 79.5 10.9 4.38
:) 7 Changshin ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ... 1972479. 41438 210. 10.6 5.35
:) 8 Cheongdam ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461. 155890 212. 12.0 5.35
:) 9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ... 2057803. 32600 158. 10.4 5.07
:) 10 Daechi ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396. 116633 116. 11.7 4.76
:) # ℹ 44 more rows
top_dest <- read_csv("data_industrial/data_topdestinations_20230608.csv") %>%
mutate(W_commune_nm = ifelse(W_commune_nm == "Myeong", "Myeongdong", W_commune_nm))
top_dest:) # A tibble: 54 × 2
:) W_commune_nm total_inflow
:) <chr> <dbl>
:) 1 Yeoksam 2447083.
:) 2 Guro 2382391.
:) 3 Myeongdong 2325144.
:) 4 Jongno 1775598.
:) 5 Yeoui 1738367.
:) 6 Cheongdam 1519280.
:) 7 Seongsu 1363683.
:) 8 Yeongdeungpo 1296826.
:) 9 Sinchon 1278512.
:) 10 Daechi 1217771.
:) # ℹ 44 more rows
dong.sf_commune <- dong.sf_commune %>%
left_join(top_dest, by = c("name" = "W_commune_nm"))
dong.sf_commune :) Simple feature collection with 54 features and 7 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 54 × 8
:) name geometry area total_emp density_emp log_total_emp log_density_emp total_inflow
:) <chr> <POLYGON [m]> [m^2] <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 Anam ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236. 71046 45.0 11.2 3.81 536318.
:) 2 Balsan ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ... 8854094. 56220 63.5 10.9 4.15 337574.
:) 3 Bangbae ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435. 85784 56.1 11.4 4.03 594415.
:) 4 Banghak ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974. 65542 33.6 11.1 3.51 386032.
:) 5 Bangi ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966. 63798 61.6 11.1 4.12 534683.
:) 6 Banpo ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ... 6799150. 54030 79.5 10.9 4.38 459624.
:) 7 Changshin ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ... 1972479. 41438 210. 10.6 5.35 380417.
:) 8 Cheongdam ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461. 155890 212. 12.0 5.35 1519280.
:) 9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ... 2057803. 32600 158. 10.4 5.07 917891.
:) 10 Daechi ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396. 116633 116. 11.7 4.76 1217771.
:) # ℹ 44 more rows
4.2.3.2 filter out
THRESHOLD = 0.73
#THRESHOLD = 0.5
#THRESHOLD = 0.8
#THRESHOLD = 0.9
dong.sf_commune_filtered <- dong.sf_commune %>%
filter(total_inflow >= quantile(total_inflow, THRESHOLD)) # 이런 거 조심!!!!
dong.sf_commune_filtered %>%
relocate(name, total_inflow, total_emp, density_emp) %>%
arrange(desc(total_inflow)):) Simple feature collection with 15 features and 7 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 15 × 8
:) name total_inflow total_emp density_emp geometry area log_total_emp log_density_emp
:) <chr> <dbl> <dbl> <dbl> <GEOMETRY [m]> [m^2] <dbl> <dbl>
:) 1 Yeoksam 2447083. 214306 479. POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ... 4469436. 12.3 6.17
:) 2 Guro 2382391. 336770 304. POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136. 12.7 5.72
:) 3 Myeongdong 2325144. 217113 773. POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ... 2809133. 12.3 6.65
:) 4 Jongno 1775598. 129040 203. POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ... 6346426. 11.8 5.31
:) 5 Yeoui 1738367. 151575 180. MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9... 8431326. 11.9 5.19
:) 6 Cheongdam 1519280. 155890 212. POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461. 12.0 5.35
:) 7 Seongsu 1363683. 155725 113. POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639. 12.0 4.73
:) 8 Yeongdeungpo 1296826. 154833 166. GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948... 9328081. 12.0 5.11
:) 9 Sinchon 1278512. 152826 105. POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732. 11.9 4.65
:) 10 Daechi 1217771. 116633 116. POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396. 11.7 4.76
:) 11 Gwanghui 1212202. 159615 239. POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ... 6689535. 12.0 5.47
:) 12 Samseong 1198605. 120988 320. POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ... 3779139. 11.7 5.77
:) 13 Munjeong 1019500. 110472 98.5 POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772. 11.6 4.59
:) 14 Jamsil 1016366. 123030 102. POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064. 11.7 4.62
:) 15 Seocho 988172. 103264 236. POLYGON ((956921 1944435, 956929 1944385, 956930 1944376, 956935 1944348, 956941 1944319, 956948 ... 4377955. 11.5 5.46
dong.sf_commune %>%
filter(total_emp >= quantile(total_emp, THRESHOLD)) %>%
relocate(name, total_inflow, total_emp, density_emp) %>%
arrange(desc(total_emp)):) Simple feature collection with 15 features and 7 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 15 × 8
:) name total_inflow total_emp density_emp geometry area log_total_emp log_density_emp
:) <chr> <dbl> <dbl> <dbl> <GEOMETRY [m]> [m^2] <dbl> <dbl>
:) 1 Guro 2382391. 336770 304. POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136. 12.7 5.72
:) 2 Myeongdong 2325144. 217113 773. POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ... 2809133. 12.3 6.65
:) 3 Yeoksam 2447083. 214306 479. POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ... 4469436. 12.3 6.17
:) 4 Gwanghui 1212202. 159615 239. POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ... 6689535. 12.0 5.47
:) 5 Cheongdam 1519280. 155890 212. POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461. 12.0 5.35
:) 6 Seongsu 1363683. 155725 113. POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639. 12.0 4.73
:) 7 Yeongdeungpo 1296826. 154833 166. GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948... 9328081. 12.0 5.11
:) 8 Sinchon 1278512. 152826 105. POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732. 11.9 4.65
:) 9 Yeoui 1738367. 151575 180. MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9... 8431326. 11.9 5.19
:) 10 Jongno 1775598. 129040 203. POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ... 6346426. 11.8 5.31
:) 11 Jamsil 1016366. 123030 102. POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064. 11.7 4.62
:) 12 Yongsin 891353. 122657 90.2 POLYGON ((962199 1951279, 962184 1951259, 962177 1951250, 962176 1951250, 962174 1951246, 962166 ... 13591954. 11.7 4.50
:) 13 Samseong 1198605. 120988 320. POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ... 3779139. 11.7 5.77
:) 14 Daechi 1217771. 116633 116. POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396. 11.7 4.76
:) 15 Munjeong 1019500. 110472 98.5 POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772. 11.6 4.59
4.2.4 biplot of compositional data
4.2.4.1 set up for compositional data
jongsaja_comm_filtered <- jongsaja_comm %>%
filter(name %in% dong.sf_commune_filtered$name)
jongsaja_comm_filtered:) # A tibble: 15 × 15
:) name MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 Cheongdam 6649 35218 2281 16725 12338 5342 7477 25307 16321 3234 5466 8937 1994 8601
:) 2 Daechi 3914 23571 2861 9480 9407 9381 6669 15237 12570 469 10135 7578 1870 3491
:) 3 Guro 43635 62403 8998 17013 69286 9807 5548 43186 42657 2606 8855 12402 2615 7759
:) 4 Gwanghui 26506 53669 2081 14591 5709 4721 3887 13237 13046 4757 6032 6330 1890 3159
:) 5 Jamsil 2001 29720 3247 14677 18254 6394 4846 8715 6821 3109 7341 8126 4844 4935
:) 6 Jongno 6187 21876 2723 16509 11779 12616 3519 12487 8122 7014 9110 9960 3053 4085
:) 7 Munjeong 4476 23459 9481 9168 7517 1498 5810 18509 7795 3538 5592 8451 1653 3525
:) 8 Myeongdong 1966 40733 11292 19350 18958 46006 5390 22197 23731 15481 2211 3697 1633 4468
:) 9 Samseong 1834 34794 1203 8693 12460 4715 3896 20386 20911 902 4496 2495 1933 2270
:) 10 Seocho 1709 15680 1819 6872 12069 2932 3261 21865 15767 6862 5152 3005 2832 3439
:) 11 Seongsu 24868 30278 6366 12132 11536 3147 5518 15441 13740 3801 11379 8589 2000 6930
:) 12 Sinchon 3176 23458 5568 26346 11100 4449 3111 13052 16398 2507 18571 15650 3183 6257
:) 13 Yeoksam 2321 38715 1799 19338 22309 20714 10619 46336 20988 2375 9748 10761 2240 6043
:) 14 Yeongdeungpo 10281 30375 6301 11452 12847 10315 3343 11238 38711 3229 3876 6262 1842 4761
:) 15 Yeoui 656 13448 1782 10065 15955 47774 4229 22939 18250 5196 1775 3860 1363 4283
jongsaja_comm_filtered_acomp <- jongsaja_comm_filtered %>%
column_to_rownames('name') %>%
as.matrix() %>%
as.table() %>%
compositions::acomp() %>%
as.matrix() %>%
as.data.frame()
jongsaja_comm_filtered_acomp:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:) Daechi 0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:) Guro 0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:) Gwanghui 0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:) Jamsil 0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:) [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
4.2.4.2 CLR transformation
jongsaja_comm_filtered_acomp_clr <- easyCODA::CLR(jongsaja_comm_filtered_acomp)$LR
jongsaja_comm_filtered_acomp_clr:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Cheongdam -0.685 0.982 -1.755 0.2375 -0.0668 -0.904 -0.568 0.652 0.2130 -1.4057 -0.8809 -0.3892 -1.889 -0.428
:) Daechi -0.916 0.879 -1.229 -0.0315 -0.0392 -0.042 -0.383 0.443 0.2506 -3.0378 0.0353 -0.2554 -1.655 -1.030
:) Guro 0.525 0.883 -1.054 -0.4168 0.9875 -0.968 -1.537 0.515 0.5024 -2.2930 -1.0698 -0.7329 -2.290 -1.202
:) Gwanghui 0.807 1.512 -1.738 0.2099 -0.7285 -0.919 -1.113 0.112 0.0979 -0.9109 -0.6735 -0.6252 -1.834 -1.320
:) Jamsil -1.592 1.106 -1.108 0.4005 0.6186 -0.430 -0.708 -0.121 -0.3658 -1.1515 -0.2923 -0.1907 -0.708 -0.689
:) [ getOption("max.print") 에 도달했습니다 -- 10 행들을 생략합니다 ]
4.2.4.2.1 row weight
it is better to choose density_emp(than total_emp) as the row weight for PCA
row_weight <- dong.sf_commune_filtered %>%
st_drop_geometry() %>%
# select(name, total_emp) %>% # name in alphametic order
# select(total_emp) %>%
# pull(total_emp)
select(name, density_emp) %>% # name in alphametic order
select(density_emp) %>%
pull(density_emp)
# select(name, total_inflow) %>% # name in alphametic order
# select(total_inflow) %>%
# pull(total_inflow)
row_weight <- row_weight / sum(row_weight)
#row_weight <- rep(1/length(dong.sf_commune_filtered$name), length(dong.sf_commune_filtered$name))
row_weight:) [1] 0.0580 0.0319 0.0833 0.0654 0.0279 0.0558 0.0270 0.2120 0.0878 0.0647 0.0311 0.0287 0.1315 0.0455 0.0493
4.2.4.3 PCA
LR_wt <- easyCODA::CLR(jongsaja_comm_filtered_acomp)$LR.wt # different weight among industries
#LR_wt <- rep(1/14, 14)
LR_wt:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) 0.0520 0.2009 0.0287 0.0913 0.0970 0.0762 0.0339 0.1298 0.1139 0.0284 0.0494 0.0506 0.0162 0.0318
jongsaja_comm_filtered_acomp_clr_pca <- easyCODA::PCA(jongsaja_comm_filtered_acomp_clr,
nd = 2,
weight = LR_wt,
#weight = T,
row.wt = row_weight
)
jongsaja_comm_filtered_acomp_clr_pca:)
:) Principal inertias (eigenvalues):
:) 1 2 3 4 5 6 7 8 9 10 11 12 13 14
:) Value 0.127232 0.046676 0.029799 0.017891 0.015934 0.008596 0.007111 0.00477 0.002099 0.001318 0.000441 0.000138 0.000008 0
:) Percentage 48.56% 17.81% 11.37% 6.83% 6.08% 3.28% 2.71% 1.82% 0.8% 0.5% 0.17% 0.05% 0% 0%
:)
:)
:) Rows:
:) Cheongdam Daechi Guro Gwanghui Jamsil Jongno Munjeong Myeongdong Samseong Seocho Seongsu Sinchon Yeoksam Yeongdeungpo Yeoui
:) Mass 0.000 0.000 0.000 0.000 0.0000 0.000 0.0000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
:) ChiDist 0.327 0.427 0.589 0.671 0.4746 0.431 0.6054 0.547 0.425 0.434 0.622 0.565 0.382 0.437 0.705
:) Inertia 0.000 0.000 0.000 0.000 0.0000 0.000 0.0000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
:) Dim. 1 -0.710 -0.532 -1.101 -1.371 -0.2896 -0.276 -1.1821 1.398 -0.268 -0.215 -1.576 -0.803 0.272 -0.356 1.792
:) Dim. 2 -0.259 -0.773 0.290 1.522 -0.0418 0.558 0.0915 0.986 -1.098 -0.855 0.863 -0.333 -1.562 1.175 -0.793
:)
:)
:) Columns:
:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Mass 0.0520 0.2009 0.0287 0.09128 0.0970 0.0762 0.03391 0.1298 0.1139 0.0284 0.0494 0.0506 0.01616 0.03179
:) ChiDist 1.0526 0.2557 0.7084 0.24421 0.3686 0.9087 0.32285 0.3451 0.3321 0.9125 0.6748 0.4980 0.41360 0.26995
:) Inertia 0.0576 0.0131 0.0144 0.00544 0.0132 0.0630 0.00353 0.0155 0.0126 0.0237 0.0225 0.0125 0.00276 0.00232
:) Dim. 1 -2.5274 -0.2748 0.4874 0.15428 0.2157 2.3958 0.06432 0.1477 0.1914 1.3673 -1.4104 -0.8778 -0.44173 -0.18556
:) Dim. 2 2.1164 0.3799 2.3748 0.20476 -0.5843 0.5163 -0.92507 -1.3161 -0.1290 2.3666 -1.3195 -0.4882 -0.53571 -0.23864
4.2.4.3.1 some stats
a = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia[1:3])
b = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia)
a / b:) [1] 0.325
:) [1] 66.4
:) [1] 77.7
a = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia[1:3])
b = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia)
a / b:) [1] 0.325
4.2.4.3.2 biplot
tiff("output/biplot.tiff", units="in", width=11, height=8, res=300)
easyCODA::PLOT.PCA(jongsaja_comm_filtered_acomp_clr_pca,
map = "contribution",
dim = c(1, 2),
colarrows = "pink",
cex = c(0.8, 1.2),
main = "",
rescale = 1,
fonts = c(1, 1)) # the default 가로 세로 비율
dev.off():) png
:) 2
4.2.5 clustering
4.2.5.1 PCs
:) [1] "Cheongdam" "Daechi" "Guro" "Gwanghui" "Jamsil" "Jongno" "Munjeong" "Myeongdong" "Samseong" "Seocho" "Seongsu" "Sinchon" "Yeoksam" "Yeongdeungpo" "Yeoui"
4.2.5.2 coordinates
pca_coords <- jongsaja_comm_filtered_acomp_clr_pca$rowcoord
pca_coords <- pca_coords %>%
as.data.frame()
rownames(pca_coords) <- b
pca_coords <- pca_coords %>%
select(1:3)
pca_coords:) V1 V2 V3
:) Cheongdam -0.710 -0.2592 0.36534
:) Daechi -0.532 -0.7735 0.53383
:) Guro -1.101 0.2903 -1.96932
:) Gwanghui -1.371 1.5221 0.43328
:) Jamsil -0.290 -0.0418 1.64406
:) Jongno -0.276 0.5582 1.96510
:) Munjeong -1.182 0.0915 0.88923
:) Myeongdong 1.398 0.9863 -0.15182
:) Samseong -0.268 -1.0984 -1.30969
:) Seocho -0.215 -0.8552 -0.26724
:) Seongsu -1.576 0.8631 0.47667
:) Sinchon -0.803 -0.3334 1.86651
:) Yeoksam 0.272 -1.5625 0.44257
:) Yeongdeungpo -0.356 1.1747 -0.96834
:) Yeoui 1.792 -0.7926 -0.00307
4.2.5.3 WARD
4.2.5.3.1 equal weight among PCs
4.2.5.3.2 different weight by each PC’s importance
eigen <- c(0.127232, 0.046676, 0.029799)
pca_coords <- pca_coords %>%
mutate(V1 = V1 * eigen[1],
V2 = V2 * eigen[2],
V3 = V3 * eigen[3])
pca_coords:) V1 V2 V3
:) Cheongdam -0.0903 -0.01210 0.0108866
:) Daechi -0.0677 -0.03610 0.0159077
:) Guro -0.1401 0.01355 -0.0586839
:) Gwanghui -0.1744 0.07105 0.0129113
:) Jamsil -0.0368 -0.00195 0.0489915
:) Jongno -0.0351 0.02605 0.0585581
:) Munjeong -0.1504 0.00427 0.0264981
:) Myeongdong 0.1779 0.04604 -0.0045241
:) Samseong -0.0341 -0.05127 -0.0390274
:) Seocho -0.0273 -0.03992 -0.0079636
:) Seongsu -0.2005 0.04028 0.0142042
:) Sinchon -0.1022 -0.01556 0.0556202
:) Yeoksam 0.0346 -0.07293 0.0131881
:) Yeongdeungpo -0.0453 0.05483 -0.0288555
:) Yeoui 0.2280 -0.03700 -0.0000915
jongsaja_comm_filtered_acomp_clr_ward <- easyCODA::WARD(pca_coords,
weight = F,
row.wt = row_weight
#row.wt = rep(1/15, 15)
)
jongsaja_comm_filtered_acomp_clr_ward:) Number of objects: 15
4.2.5.4 choose the number of clusters
jongsaja_comm_filtered_acomp_mat <- jongsaja_comm_filtered_acomp %>%
as.matrix() %>%
as.data.frame()
jongsaja_comm_filtered_acomp_mat:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:) Daechi 0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:) Guro 0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:) Gwanghui 0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:) Jamsil 0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:) [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
4.2.6 emp_mode
4.2.6.1 define cluster
emp_mode <- as.data.frame(as.matrix(cutree(jongsaja_comm_filtered_acomp_clr_ward, k = 4))) %>%
rownames_to_column() %>%
as_tibble() %>%
bind_cols(rownames(jongsaja_comm_filtered_acomp_clr)) %>%
select(-rowname)
colnames(emp_mode) <- c("cluster", "name")
emp_mode:) # A tibble: 15 × 2
:) cluster name
:) <int> <chr>
:) 1 1 Cheongdam
:) 2 1 Daechi
:) 3 2 Guro
:) 4 2 Gwanghui
:) 5 1 Jamsil
:) 6 1 Jongno
:) 7 2 Munjeong
:) 8 3 Myeongdong
:) 9 4 Samseong
:) 10 4 Seocho
:) 11 2 Seongsu
:) 12 1 Sinchon
:) 13 4 Yeoksam
:) 14 1 Yeongdeungpo
:) 15 3 Yeoui
4.2.6.3 sorting and naming
emp_mode <- emp_mode %>%
mutate(cluster = as.character(cluster)) %>%
mutate(cluster = if_else(cluster == "1", "Mixed", cluster),
cluster = if_else(cluster == "2", "Manufacturing", cluster),
cluster = if_else(cluster == "3", "Financial", cluster),
cluster = if_else(cluster == "4", "Professional", cluster)) %>%
mutate(cluster = factor(cluster, levels = c("Manufacturing", "Mixed", "Professional", "Financial")))
emp_mode:) # A tibble: 15 × 4
:) cluster name share_of_KBI share_of_CSI
:) <fct> <chr> <dbl> <dbl>
:) 1 Mixed Cheongdam 27.6 38.8
:) 2 Mixed Daechi 29.2 31.3
:) 3 Manufacturing Guro 36.3 25.9
:) 4 Manufacturing Gwanghui 14.8 44.7
:) 5 Mixed Jamsil 27.1 40.1
:) 6 Mixed Jongno 28.6 32.9
:) 7 Manufacturing Munjeong 24.9 32.7
:) 8 Financial Myeongdong 40.1 29.7
:) 9 Professional Samseong 31.0 37.8
:) 10 Professional Seocho 35.7 25.2
:) 11 Manufacturing Seongsu 19.3 31.7
:) 12 Mixed Sinchon 18.7 36.7
:) 13 Professional Yeoksam 41.7 29.9
:) 14 Mixed Yeongdeungpo 22.2 30.1
:) 15 Financial Yeoui 57.2 18.3
4.2.6.4 dong.sf_commune_filtered
dong.sf_commune_filtered <- dong.sf_commune_filtered %>%
left_join(emp_mode, by = "name")
dong.sf_commune_filtered:) Simple feature collection with 15 features and 10 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 15 × 11
:) name geometry area total_emp density_emp log_total_emp log_density_emp total_inflow cluster share_of_KBI share_of_CSI
:) <chr> <GEOMETRY [m]> [m^2] <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl>
:) 1 Cheongdam POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ... 7369461. 155890 212. 12.0 5.35 1519280. Mixed 27.6 38.8
:) 2 Daechi POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396. 116633 116. 11.7 4.76 1217771. Mixed 29.2 31.3
:) 3 Guro POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136. 336770 304. 12.7 5.72 2382391. Manufacturing 36.3 25.9
:) 4 Gwanghui POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ... 6689535. 159615 239. 12.0 5.47 1212202. Manufacturing 14.8 44.7
:) 5 Jamsil POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064. 123030 102. 11.7 4.62 1016366. Mixed 27.1 40.1
:) 6 Jongno POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ... 6346426. 129040 203. 11.8 5.31 1775598. Mixed 28.6 32.9
:) 7 Munjeong POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772. 110472 98.5 11.6 4.59 1019500. Manufacturing 24.9 32.7
:) 8 Myeongdong POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ... 2809133. 217113 773. 12.3 6.65 2325144. Financial 40.1 29.7
:) 9 Samseong POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ... 3779139. 120988 320. 11.7 5.77 1198605. Professional 31.0 37.8
:) 10 Seocho POLYGON ((956921 1944435, 956929 1944385, 956930 1944376, 956935 1944348, 956941 1944319, 956948 ... 4377955. 103264 236. 11.5 5.46 988172. Professional 35.7 25.2
:) 11 Seongsu POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639. 155725 113. 12.0 4.73 1363683. Manufacturing 19.3 31.7
:) 12 Sinchon POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732. 152826 105. 11.9 4.65 1278512. Mixed 18.7 36.7
:) 13 Yeoksam POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ... 4469436. 214306 479. 12.3 6.17 2447083. Professional 41.7 29.9
:) 14 Yeongdeungpo GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948... 9328081. 154833 166. 12.0 5.11 1296826. Mixed 22.2 30.1
:) 15 Yeoui MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9... 8431326. 151575 180. 11.9 5.19 1738367. Financial 57.2 18.3
dong.sf_commune_filtered %>%
st_drop_geometry() %>%
group_by(cluster) %>%
summarise(mean_density_emp = weighted.mean(density_emp, total_emp),
mean_share_of_KBI = weighted.mean(share_of_KBI, total_emp),
mean_share_of_CSI = weighted.mean(share_of_CSI, total_emp)):) # A tibble: 4 × 4
:) cluster mean_density_emp mean_share_of_KBI mean_share_of_CSI
:) <fct> <dbl> <dbl> <dbl>
:) 1 Manufacturing 221. 26.7 32.0
:) 2 Mixed 153. 25.3 35.0
:) 3 Professional 378. 37.3 31.0
:) 4 Financial 529. 47.1 25.0
4.2.7 mapping
4.2.7.1 figure 1b
4.2.7.2 figure 3b
4.3 radar plot
4.3.1 compositional data
jongsaja_comm_filtered_acomp <- jongsaja_comm_filtered %>%
column_to_rownames('name') %>%
as.matrix() %>%
as.table() %>%
compositions::acomp() %>%
as.matrix() %>%
as.data.frame()
jongsaja_comm_filtered_acomp:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:) Daechi 0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:) Guro 0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:) Gwanghui 0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:) Jamsil 0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:) [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
:) MF WR TS AF IC FI RE PT BF PA EC HS RS PS
:) Cheongdam 4.27 22.6 1.46 10.73 7.91 3.43 4.80 16.23 10.47 2.075 3.51 5.73 1.279 5.52
:) Daechi 3.36 20.2 2.45 8.13 8.07 8.04 5.72 13.06 10.78 0.402 8.69 6.50 1.603 2.99
:) Guro 12.96 18.5 2.67 5.05 20.57 2.91 1.65 12.82 12.67 0.774 2.63 3.68 0.776 2.30
:) Gwanghui 16.61 33.6 1.30 9.14 3.58 2.96 2.44 8.29 8.17 2.980 3.78 3.97 1.184 1.98
:) Jamsil 1.63 24.2 2.64 11.93 14.84 5.20 3.94 7.08 5.54 2.527 5.97 6.60 3.937 4.01
:) [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
:) # A tibble: 15 × 2
:) cluster name
:) <fct> <chr>
:) 1 Mixed Cheongdam
:) 2 Mixed Daechi
:) 3 Manufacturing Guro
:) 4 Manufacturing Gwanghui
:) 5 Mixed Jamsil
:) 6 Mixed Jongno
:) 7 Manufacturing Munjeong
:) 8 Financial Myeongdong
:) 9 Professional Samseong
:) 10 Professional Seocho
:) 11 Manufacturing Seongsu
:) 12 Mixed Sinchon
:) 13 Professional Yeoksam
:) 14 Mixed Yeongdeungpo
:) 15 Financial Yeoui
jongsaja_comm_filtered_acomp_cluster <- jongsaja_comm_filtered_acomp %>%
bind_cols(a) %>%
select(-name) %>%
relocate(EC, PT, IC, PA, FI, RS, RE, HS, WR, MF, BF, PS, TS, AF) %>%
rownames_to_column('name') %>%
relocate(name, cluster)
jongsaja_comm_filtered_acomp_cluster:) name cluster EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 Cheongdam Mixed 3.51 16.23 7.91 2.075 3.43 1.279 4.80 5.73 22.6 4.27 10.47 5.52 1.46 10.73
:) 2 Daechi Mixed 8.69 13.06 8.07 0.402 8.04 1.603 5.72 6.50 20.2 3.36 10.78 2.99 2.45 8.13
:) 3 Guro Manufacturing 2.63 12.82 20.57 0.774 2.91 0.776 1.65 3.68 18.5 12.96 12.67 2.30 2.67 5.05
:) 4 Gwanghui Manufacturing 3.78 8.29 3.58 2.980 2.96 1.184 2.44 3.97 33.6 16.61 8.17 1.98 1.30 9.14
:) [ reached 'max' / getOption("max.print") -- omitted 11 rows ]
:) [1] 1.02
:) [1] 7.08
:) [1] 3.58
:) [1] 0.402
:) [1] 1.36
:) [1] 0.752
:) [1] 1.65
:) [1] 1.7
:) [1] 8.87
:) [1] 0.433
:) [1] 5.54
:) [1] 1.88
:) [1] 0.839
:) [1] 12.2
:) [1] 21.6
:) [1] 20.6
:) [1] 7.13
:) [1] 31.5
:) [1] 3.94
:) [1] 5.72
:) [1] 10.2
:) [1] 33.6
:) [1] 16.6
:) [1] 25
:) [1] 5.52
:) [1] 8.58
4.3.2 visualization
4.3.2.1 15 centers
entire <- jongsaja_comm_filtered_acomp_cluster %>%
summarise(across(is.numeric, mean)) %>%
rownames_to_column('name')
entire$name <- "average"
entire :) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 average 4.94 13 9.7 2.84 7.62 1.62 3.39 5.06 20.1 5.2 11.4 3.18 2.87 9.13
4.3.2.2 Manufacturing
a <- jongsaja_comm_filtered_acomp_cluster %>%
filter(cluster == "Manufacturing") %>%
select(-cluster)
a:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 Guro 2.63 12.82 20.57 0.774 2.91 0.776 1.65 3.68 18.5 12.96 12.67 2.30 2.67 5.05
:) 2 Gwanghui 3.78 8.29 3.58 2.980 2.96 1.184 2.44 3.97 33.6 16.61 8.17 1.98 1.30 9.14
:) 3 Munjeong 5.06 16.75 6.80 3.203 1.36 1.496 5.26 7.65 21.2 4.05 7.06 3.19 8.58 8.30
:) 4 Seongsu 7.31 9.92 7.41 2.441 2.02 1.284 3.54 5.52 19.4 15.97 8.82 4.45 4.09 7.79
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 4.69 11.9 9.59 2.35 2.31 1.19 3.22 5.2 23.2 12.4 9.18 2.98 4.16 7.57
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 4.69 11.9 9.59 2.35 2.31 1.19 3.22 5.20 23.2 12.4 9.18 2.98 4.16 7.57
:) 2 average 4.94 13.0 9.70 2.84 7.62 1.62 3.39 5.06 20.1 5.2 11.39 3.18 2.87 9.13
4.3.2.3 Mixed
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 Cheongdam 3.51 16.23 7.91 2.075 3.43 1.28 4.80 5.73 22.6 4.27 10.47 5.52 1.46 10.73
:) 2 Daechi 8.69 13.06 8.07 0.402 8.04 1.60 5.72 6.50 20.2 3.36 10.78 2.99 2.45 8.13
:) 3 Jamsil 5.97 7.08 14.84 2.527 5.20 3.94 3.94 6.60 24.2 1.63 5.54 4.01 2.64 11.93
:) 4 Jongno 7.06 9.68 9.13 5.436 9.78 2.37 2.73 7.72 17.0 4.79 6.29 3.17 2.11 12.79
:) 5 Sinchon 12.15 8.54 7.26 1.640 2.91 2.08 2.04 10.24 15.3 2.08 10.73 4.09 3.64 17.24
:) [ reached 'max' / getOption("max.print") -- omitted 1 rows ]
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 6.65 10.3 9.25 2.36 6 2.08 3.56 6.81 19.8 3.79 11.5 3.81 2.73 11.4
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 6.65 10.3 9.25 2.36 6.00 2.08 3.56 6.81 19.8 3.79 11.5 3.81 2.73 11.37
:) 2 average 4.94 13.0 9.70 2.84 7.62 1.62 3.39 5.06 20.1 5.20 11.4 3.18 2.87 9.13
4.3.2.4 Professional
a <- jongsaja_comm_filtered_acomp_cluster %>%
filter(cluster == "Professional") %>%
select(-cluster)
a:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 Samseong 3.72 16.8 10.3 0.746 3.90 1.60 3.22 2.06 28.8 1.52 17.28 1.88 0.994 7.19
:) 2 Seocho 4.99 21.2 11.7 6.645 2.84 2.74 3.16 2.91 15.2 1.65 15.27 3.33 1.762 6.65
:) 3 Yeoksam 4.55 21.6 10.4 1.108 9.67 1.05 4.96 5.02 18.1 1.08 9.79 2.82 0.839 9.02
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 4.42 19.9 10.8 2.83 5.47 1.8 3.78 3.33 20.7 1.42 14.1 2.68 1.2 7.62
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 4.42 19.9 10.8 2.83 5.47 1.80 3.78 3.33 20.7 1.42 14.1 2.68 1.20 7.62
:) 2 average 4.94 13.0 9.7 2.84 7.62 1.62 3.39 5.06 20.1 5.20 11.4 3.18 2.87 9.13
4.3.2.5 Financial
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 Myeongdong 1.02 10.2 8.73 7.13 21.2 0.752 2.48 1.70 18.76 0.906 10.9 2.06 5.20 8.91
:) 2 Yeoui 1.17 15.1 10.53 3.43 31.5 0.899 2.79 2.55 8.87 0.433 12.0 2.83 1.18 6.64
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 1.09 12.7 9.63 5.28 26.4 0.826 2.64 2.12 13.8 0.669 11.5 2.44 3.19 7.78
:) name EC PT IC PA FI RS RE HS WR MF BF PS TS AF
:) 1 mean 1.09 12.7 9.63 5.28 26.35 0.826 2.64 2.12 13.8 0.669 11.5 2.44 3.19 7.78
:) 2 average 4.94 13.0 9.70 2.84 7.62 1.616 3.39 5.06 20.1 5.196 11.4 3.18 2.87 9.13
4.3.2.6 output
b %>%
mutate(name = ifelse(name != "mean", "average share across all 15 centers", "average share within each cluster")) %>%
ggradar(grid.min = 10, grid.mid = 20, grid.max = 30,
gridline.min.colour = "grey", gridline.mid.colour = "grey", gridline.max.colour = "grey",
grid.line.width = 0.8, grid.label.size = 8,
centre.y = 0.4,
values.radar = c("10%", "20%", "30%"),
group.colours = c("orange", "blue"),
group.point.size = 1.8, group.line.width = 0.8,
fill = TRUE, fill.alpha = 0.10,
plot.title = "Financial",
#background.circle.colour = "white",
base.size = 13,
axis.line.colour = "grey90",
label.centre.y = F,
plot.legend = T) 5 making the longitudinal dataset
5.1 matching mobility code to shp code
5.1.1 shp code
:) # A tibble: 1,123 × 3
:) adm_nm adm_cd college
:) * <chr> <chr> <dbl>
:) 1 종로구_사직동 1101053 0.808
:) 2 종로구_삼청동 1101054 0.706
:) 3 종로구_부암동 1101055 0.708
:) 4 종로구_평창동 1101056 0.779
:) 5 종로구_무악동 1101057 0.780
:) 6 종로구_교남동 1101058 0.675
:) 7 종로구_가회동 1101060 0.603
:) 8 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 종로구_종로5.6가동 1101063 0.537
:) 10 종로구_이화동 1101064 0.645
:) # ℹ 1,113 more rows
5.1.2 mobility_code
mobility_code <- readxl::read_excel("adm_codes/서울생활이동데이터_행정동코드_20210907.xlsx")
colnames(mobility_code) <- c("sd_cd", "sgg_cd", "adm_cd", "adm_name", "adm_nm")
mobility_code:) # A tibble: 1,152 × 5
:) sd_cd sgg_cd adm_cd adm_name adm_nm
:) <dbl> <dbl> <dbl> <chr> <chr>
:) 1 11000 11010 1101053 사직동 서울특별시 종로구 사직동
:) 2 11000 11010 1101054 삼청동 서울특별시 종로구 삼청동
:) 3 11000 11010 1101055 부암동 서울특별시 종로구 부암동
:) 4 11000 11010 1101056 평창동 서울특별시 종로구 평창동
:) 5 11000 11010 1101057 무악동 서울특별시 종로구 무악동
:) 6 11000 11010 1101058 교남동 서울특별시 종로구 교남동
:) 7 11000 11010 1101060 가회동 서울특별시 종로구 가회동
:) 8 11000 11010 1101061 종로1·2·3·4가동 서울특별시 종로구 종로1·2·3·4가동
:) 9 11000 11010 1101063 종로5·6가동 서울특별시 종로구 종로5·6가동
:) 10 11000 11010 1101064 이화동 서울특별시 종로구 이화동
:) # ℹ 1,142 more rows
mobility_code <- mobility_code %>%
filter(str_sub(adm_cd, 1, 5) != "23320") %>% # 23320이 옹진군임
filter(as.numeric(adm_cd) > 0 & str_length(adm_cd) > 5) %>%
filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
select(adm_cd, adm_nm) %>%
mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%
mutate(adm_nm_sgg = str_split_i(adm_nm, " ", -2),
adm_nm_emd = str_split_i(adm_nm, " ", -1)) %>%
mutate(adm_nm_sgg = ifelse(adm_nm_sgg == "남구", "미추홀구", adm_nm_sgg)) %>% # 시군구 단위 개명: 남구 -> 미추홀구
mutate(adm_nm = str_c(adm_nm_sgg, "_", adm_nm_emd)) %>%
select(-c(adm_nm_sgg, adm_nm_emd))
mobility_code:) # A tibble: 1,131 × 2
:) adm_cd adm_nm
:) <dbl> <chr>
:) 1 1101053 종로구_사직동
:) 2 1101054 종로구_삼청동
:) 3 1101055 종로구_부암동
:) 4 1101056 종로구_평창동
:) 5 1101057 종로구_무악동
:) 6 1101058 종로구_교남동
:) 7 1101060 종로구_가회동
:) 8 1101061 종로구_종로1.2.3.4가동
:) 9 1101063 종로구_종로5.6가동
:) 10 1101064 종로구_이화동
:) # ℹ 1,121 more rows
5.1.3 matching
5.1.3.1 matched
mobility_code_1 <- mobility_code %>%
left_join(dong.sf_resid_tb, by = "adm_nm") %>%
rename(adm_cd_mb = adm_cd.x,
adm_cd_shp = adm_cd.y) %>%
filter(!is.na(college))
mobility_code_1:) # A tibble: 1,087 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <dbl> <chr> <chr> <dbl>
:) 1 1101053 종로구_사직동 1101053 0.808
:) 2 1101054 종로구_삼청동 1101054 0.706
:) 3 1101055 종로구_부암동 1101055 0.708
:) 4 1101056 종로구_평창동 1101056 0.779
:) 5 1101057 종로구_무악동 1101057 0.780
:) 6 1101058 종로구_교남동 1101058 0.675
:) 7 1101060 종로구_가회동 1101060 0.603
:) 8 1101061 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 1101063 종로구_종로5.6가동 1101063 0.537
:) 10 1101064 종로구_이화동 1101064 0.645
:) # ℹ 1,077 more rows
5.1.3.2 unmatched(imputation)
mobility_code_2 <- mobility_code %>%
left_join(dong.sf_resid_tb, by = "adm_nm") %>%
rename(adm_cd_mb = adm_cd.x,
adm_cd_shp = adm_cd.y) %>%
filter(is.na(college))
mobility_code_2:) # A tibble: 44 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <dbl> <chr> <chr> <dbl>
:) 1 2308069 서구_검단2동 <NA> NA
:) 2 2308070 서구_검단3동 <NA> NA
:) 3 2308071 서구_검단4동 <NA> NA
:) 4 2308076 서구_검단1동 <NA> NA
:) 5 2308077 서구_검단5동 <NA> NA
:) 6 3101459 영통구_태장동 <NA> NA
:) 7 3103053 의정부시_의정부3동 <NA> NA
:) 8 3103061 의정부시_가능1동 <NA> NA
:) 9 3105051 부천시_심곡2동 <NA> NA
:) 10 3105052 부천시_심곡1동 <NA> NA
:) # ℹ 34 more rows
# 서구 검단1동, 검단2동, 검단3동, 검단4동, 검단5동 -> 검단동
# 영통구 태장동 -> 망포동
# 의정부시 의정부3동 -> 의정부1동
# 의정부시 가능1동 -> 가능동
# 부천시 심곡2동, 심곡1동, 심국3동, 원미2동, 소사동 -> 심곡동
# 부천시 원미1동, 역곡1동, 역곡2동, 춘의동, 도당동 -> 부천동
# 부천시 중동, 상동 -> 중동
# 부천시 중4동, 약대동, 중1동, 중2동, 중3동 -> 신중동
# 부천시 상2동, 상1동, 상3동 -> 상동
# 부천시 심곡본동, 심곡본1동, 송내1동, 송내2동 -> 대산동
# 부천시 소사본동, 소사본3동 -> 소사본동
# 부천시 괴안동, 범박동, 역곡3동 -> 범안동
# 부천시 성각동, 고강본동, 고강1동 -> 성곡동
# 부천시 오정동, 원종1동, 원종2동, 신흥동 -> 오정동
# 나머지 신도동, 퇴계원면, 동백동, 영덕동, 광남동은 버림
# 부천시 참고: https://wehagothelp.zendesk.com/hc/ko/articles/360000329042-%EA%B2%BD%EA%B8%B0%EB%8F%84-%EB%B6%80%EC%B2%9C%EC%8B%9C-%ED%96%89%EC%A0%95%EA%B8%B0%EA%B4%80-%EC%BD%94%EB%93%9C-%EB%B0%8F-%EA%B4%80%ED%95%A0%EA%B5%AC%EC%97%AD-%EB%B2%95%EC%A0%95%EB%8F%99-%EB%B3%80%EA%B2%BD%EC%9C%BC%EB%A1%9C-%EC%9D%B8%ED%95%9C-%EC%97%85%EB%8D%B0%EC%9D%B4%ED%8A%B8-%EC%95%88%EB%82%B4
mobility_code_2 <- mobility_code_2 %>%
mutate(adm_nm_temp = str_split_i(adm_nm, "_", 2)) %>%
mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("검단1동", "검단2동", "검단3동", "검단4동", "검단5동"), "검단동",
ifelse(adm_nm_temp %in% "태장동", "망포1동",
ifelse(adm_nm_temp %in% "의정부3동", "의정부1동", adm_nm_temp)))) %>%
mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("가능1동"), "가능동",
ifelse(adm_nm_temp %in% c("심곡2동", "심곡1동", "심곡3동", "원미2동", "소사동"), "심곡동",
ifelse(adm_nm_temp %in% c("원미1동", "역곡1동", "역곡2동", "춘의동", "도당동"), "부천동", adm_nm_temp)))) %>%
mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("중동", "상동"), "중동",
ifelse(adm_nm_temp %in% c("중4동", "약대동", "중1동", "중2동", "중3동"), "신중동",
ifelse(adm_nm_temp %in% c("상2동", "상1동", "상3동"), "상동", adm_nm_temp)))) %>%
mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("심곡본동", "심곡본1동", "송내1동", "송내2동"), "대산동",
ifelse(adm_nm_temp %in% c("소사본동", "소사본3동"), "소사본동",
ifelse(adm_nm_temp %in% c("괴안동", "범박동", "역곡3동"), "범안동", adm_nm_temp)))) %>%
mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("성각동", "고강본동", "고강1동"), "성곡동",
ifelse(adm_nm_temp %in% c("오정동", "원종1동", "원종2동", "신흥동"), "오정동", adm_nm_temp))) %>%
mutate(adm_nm = str_c(str_split_i(adm_nm, "_", 1),
"_",
adm_nm_temp)) %>%
filter(!adm_nm_temp %in% c("신도동", "퇴계원면", "동백동", "영덕동", "광남동")) %>%
select(-adm_nm_temp)
mobility_code_2:) # A tibble: 39 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <dbl> <chr> <chr> <dbl>
:) 1 2308069 서구_검단동 <NA> NA
:) 2 2308070 서구_검단동 <NA> NA
:) 3 2308071 서구_검단동 <NA> NA
:) 4 2308076 서구_검단동 <NA> NA
:) 5 2308077 서구_검단동 <NA> NA
:) 6 3101459 영통구_망포1동 <NA> NA
:) 7 3103053 의정부시_의정부1동 <NA> NA
:) 8 3103061 의정부시_가능동 <NA> NA
:) 9 3105051 부천시_심곡동 <NA> NA
:) 10 3105052 부천시_심곡동 <NA> NA
:) # ℹ 29 more rows
:) adm_cd_mb adm_nm adm_cd_shp college
:) 0 0 39 39
mobility_code_2 <- mobility_code_2 %>%
select(adm_cd_mb, adm_nm) %>%
left_join(dong.sf_resid_tb, by = "adm_nm") %>%
rename(adm_cd_shp = adm_cd)
mobility_code_2:) # A tibble: 39 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <dbl> <chr> <chr> <dbl>
:) 1 2308069 서구_검단동 2308080 0.326
:) 2 2308070 서구_검단동 2308080 0.326
:) 3 2308071 서구_검단동 2308080 0.326
:) 4 2308076 서구_검단동 2308080 0.326
:) 5 2308077 서구_검단동 2308080 0.326
:) 6 3101459 영통구_망포1동 3101467 0.653
:) 7 3103053 의정부시_의정부1동 3103069 0.276
:) 8 3103061 의정부시_가능동 3103068 0.253
:) 9 3105051 부천시_심곡동 3105087 0.269
:) 10 3105052 부천시_심곡동 3105087 0.269
:) # ℹ 29 more rows
:) adm_cd_mb adm_nm adm_cd_shp college
:) 0 0 0 0
5.1.3.3 integrate
:) # A tibble: 1,126 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <dbl> <chr> <chr> <dbl>
:) 1 1101053 종로구_사직동 1101053 0.808
:) 2 1101054 종로구_삼청동 1101054 0.706
:) 3 1101055 종로구_부암동 1101055 0.708
:) 4 1101056 종로구_평창동 1101056 0.779
:) 5 1101057 종로구_무악동 1101057 0.780
:) 6 1101058 종로구_교남동 1101058 0.675
:) 7 1101060 종로구_가회동 1101060 0.603
:) 8 1101061 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 1101063 종로구_종로5.6가동 1101063 0.537
:) 10 1101064 종로구_이화동 1101064 0.645
:) # ℹ 1,116 more rows
:) adm_cd_mb adm_nm adm_cd_shp college
:) 0 0 0 0
mobility_code <- mobility_code %>%
mutate(adm_cd_mb = as.character(adm_cd_mb),
adm_cd_shp = as.character(adm_cd_shp))
mobility_code:) # A tibble: 1,126 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <chr> <chr> <chr> <dbl>
:) 1 1101053 종로구_사직동 1101053 0.808
:) 2 1101054 종로구_삼청동 1101054 0.706
:) 3 1101055 종로구_부암동 1101055 0.708
:) 4 1101056 종로구_평창동 1101056 0.779
:) 5 1101057 종로구_무악동 1101057 0.780
:) 6 1101058 종로구_교남동 1101058 0.675
:) 7 1101060 종로구_가회동 1101060 0.603
:) 8 1101061 종로구_종로1.2.3.4가동 1101061 0.682
:) 9 1101063 종로구_종로5.6가동 1101063 0.537
:) 10 1101064 종로구_이화동 1101064 0.645
:) # ℹ 1,116 more rows
:) # A tibble: 77 × 4
:) adm_cd_mb adm_nm adm_cd_shp college
:) <chr> <chr> <chr> <dbl>
:) 1 2303052 미추홀구_숭의2동 2309052 0.184
:) 2 2303054 미추홀구_숭의4동 2309054 0.245
:) 3 2303056 미추홀구_용현2동 2309056 0.340
:) 4 2303057 미추홀구_용현3동 2309057 0.242
:) 5 2303059 미추홀구_용현5동 2309059 0.340
:) 6 2303060 미추홀구_학익1동 2309060 0.364
:) 7 2303061 미추홀구_학익2동 2309061 0.361
:) 8 2303062 미추홀구_도화1동 2309062 0.245
:) 9 2303065 미추홀구_주안1동 2309065 0.272
:) 10 2303066 미추홀구_주안2동 2309066 0.216
:) # ℹ 67 more rows
5.2 stacking flow data
FI <- paste0("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility/",
list.files(path = "//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility/")){r eval = FALSE} for (I in 1:length(FI)) { print(I)
fi <- as.character(FI[I])
print(fi)
fi_within <- paste0(fi, "/", list.files(path = fi))
#print(fi_within)
for (i in 8:10) {
print(fi_within[i])
temp <- read_csv(as.character(fi_within[i]), locale=locale('ko',encoding='euc-kr'))
colnames(temp) <- c("DEPRT_YM", "DAYOFWEEK", "DEPRT_HOUR", "DEPRTP", "DESTNTN", "GENDER", "AGE_GR", "FLOW_TYPE", "TRVL_TIME", "LIFE_FLPOP")
temp <- temp %>%
mutate_at(1:9, as.character) %>%
mutate(LIFE_FLPOP = replace(LIFE_FLPOP, LIFE_FLPOP=="*", "2")) %>%
mutate(LIFE_FLPOP = as.numeric(LIFE_FLPOP)) %>%
filter(str_sub(DEPRTP, 1, 2) %in% c("11", "23", "31"),
str_sub(DESTNTN, 1, 2) == "11") %>%
filter(FLOW_TYPE == "HW") %>%
filter(DAYOFWEEK %in% c("월", "화", "수", "목", "금")) %>%
filter(as.numeric(AGE_GR) >= 20 & as.numeric(AGE_GR) < 60)
if (i == 8){
data <- plyr::rbind.fill(temp)
rm(temp) # 부산물 삭제
} else {
data <- plyr::rbind.fill(data, temp)
rm(temp) #부산물 삭제
}
#print(data)
}
# edit flow data
data <- data %>%
group_by(DEPRTP, DESTNTN, GENDER, AGE_GR, TRVL_TIME) %>%
summarise(LIFE_FLPOP = sum(LIFE_FLPOP))
# save
# indexing
if (I < 10){
I <- as.character(paste0("0", I))
} else {
I <- as.character(I)
}
# to files
data %>%
write_excel_csv(paste0("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/",
I,
".csv"))
rm(data)
}
5.3 stacking time-varying predictors
5.3.1 pilot
temp <- read_csv("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/01.csv",
locale=locale('ko', encoding = 'UTF-8'))
temp:) # A tibble: 1,559,180 × 6
:) DEPRTP DESTNTN GENDER AGE_GR TRVL_TIME LIFE_FLPOP
:) <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
:) 1 1101053 1101053 F 20 10 4
:) 2 1101053 1101053 F 25 10 299.
:) 3 1101053 1101053 F 25 20 26.5
:) 4 1101053 1101053 F 30 10 209.
:) 5 1101053 1101053 F 35 10 438.
:) 6 1101053 1101053 F 35 20 3.01
:) 7 1101053 1101053 F 35 40 6.01
:) 8 1101053 1101053 F 40 10 143.
:) 9 1101053 1101053 F 40 20 23.7
:) 10 1101053 1101053 F 45 10 164.
:) # ℹ 1,559,170 more rows
temp <- temp %>%
mutate(DEPRTP = as.character(DEPRTP),
DESTNTN = as.character(DESTNTN)) %>%
group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
ungroup()
temp:) # A tibble: 1,541,030 × 4
:) DEPRTP DESTNTN TRVL_TIME LIFE_FLPOP
:) <chr> <chr> <dbl> <dbl>
:) 1 1101053 1101053 10 4
:) 2 1101053 1101053 10 44.2
:) 3 1101053 1101053 10 84.3
:) 4 1101053 1101053 10 87.0
:) 5 1101053 1101053 10 87.6
:) 6 1101053 1101053 10 89.8
:) 7 1101053 1101053 10 109.
:) 8 1101053 1101053 10 143.
:) 9 1101053 1101053 10 164.
:) 10 1101053 1101053 10 209.
:) # ℹ 1,541,020 more rows
# DEPRTP
temp <- temp %>%
left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
rename(H_adm_nm = adm_nm,
H_adm_cd_shp = adm_cd_shp) %>%
select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN
left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
rename(W_commune_nm = name,
W_adm_cd_shp = DESTNTN) %>%
select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN aggregate
group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
LIFE_FLPOP = sum(LIFE_FLPOP),
college = mean(college)) %>%
ungroup()
#%>%
# mutate(Time = str_c(I))
temp:) # A tibble: 48,487 × 6
:) H_adm_nm H_adm_cd_shp W_commune_nm TRVL_TIME LIFE_FLPOP college
:) <chr> <chr> <chr> <dbl> <dbl> <dbl>
:) 1 가평군_가평읍 3137011 Anam 47.0 6.56 0.360
:) 2 가평군_가평읍 3137011 Bangbae 47.8 45.4 0.360
:) 3 가평군_가평읍 3137011 Banghak 30 18.1 0.360
:) 4 가평군_가평읍 3137011 Bangi 37.3 123. 0.360
:) 5 가평군_가평읍 3137011 Changshin 34.5 135. 0.360
:) 6 가평군_가평읍 3137011 Cheongdam 47.5 134. 0.360
:) 7 가평군_가평읍 3137011 Daechi 34.9 123. 0.360
:) 8 가평군_가평읍 3137011 Daehak 60 2 0.360
:) 9 가평군_가평읍 3137011 Gil 20 24.1 0.360
:) 10 가평군_가평읍 3137011 Gongneung 35.5 101. 0.360
:) # ℹ 48,477 more rows
5.3.2 top destinations
5.3.3 import
5.3.4 binding rows
for (I in 1:length(FI)) { # in 1:length(FI) 임을 잊지말것 print(I)
temp <- read_csv(as.character(FI[I]), locale=locale('ko', encoding = 'UTF-8'))
if (I == 1){
temp <- temp %>%
mutate(DEPRTP = as.character(DEPRTP),
DESTNTN = as.character(DESTNTN)) %>%
group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
ungroup()
# DEPRTP
temp <- temp %>%
left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
rename(H_adm_nm = adm_nm,
H_adm_cd_shp = adm_cd_shp) %>%
select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN
left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
rename(W_commune_nm = name,
W_adm_cd_shp = DESTNTN) %>%
select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN aggregate
group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
LIFE_FLPOP = sum(LIFE_FLPOP),
college = mean(college)) %>%
ungroup() %>%
mutate(Time = str_c(I))
head(temp)
data <- temp
rm(temp)
print("here")
}
else {
temp <- temp %>%
mutate(DEPRTP = as.character(DEPRTP),
DESTNTN = as.character(DESTNTN)) %>%
group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
ungroup()
# DEPRTP
temp <- temp %>%
left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
rename(H_adm_nm = adm_nm,
H_adm_cd_shp = adm_cd_shp) %>%
select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN
left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
rename(W_commune_nm = name,
W_adm_cd_shp = DESTNTN) %>%
select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
# DESTNTN aggregate
group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
LIFE_FLPOP = sum(LIFE_FLPOP),
college = mean(college)) %>%
ungroup() %>%
mutate(Time = str_c(I))
head(temp)
data <- data %>%
dplyr::bind_rows(temp)
rm(temp)
print("here")
}
}
5.4 joining time-varying predictors
5.4.1 set up
#data <- data %>%
# mutate(H_adm_nm = as.character(H_adm_nm),
# H_adm_cd_shp = as.character(H_adm_cd_shp)) %>%
# rename(time_distance = TRVL_TIME,
# flow = LIFE_FLPOP) %>%
# mutate(hw_link = str_c(H_adm_nm, " -> ", W_commune_nm)) %>%
# mutate(Time_ts = case_when(Time == "1" ~ "2020-01-01",
# Time == "2" ~ "2020-02-01",
# Time == "3" ~ "2020-03-01",
# Time == "4" ~ "2020-04-01",
# Time == "5" ~ "2020-05-01",
# Time == "6" ~ "2020-06-01",
# Time == "7" ~ "2020-07-01",
# Time == "8" ~ "2020-08-01",
# Time == "9" ~ "2020-09-01",
# Time == "10" ~ "2020-10-01",
# Time == "11" ~ "2020-11-01",
# Time == "12" ~ "2020-12-01",
# Time == "13" ~ "2021-01-01",
# Time == "14" ~ "2021-02-01",
# Time == "15" ~ "2021-03-01",
# Time == "16" ~ "2021-04-01",
# Time == "17" ~ "2021-05-01",
# Time == "18" ~ "2021-06-01",
# Time == "19" ~ "2021-07-01",
# Time == "20" ~ "2021-08-01",
# Time == "21" ~ "2021-09-01",
# Time == "22" ~ "2021-10-01",
# Time == "23" ~ "2021-11-01",
# Time == "24" ~ "2021-12-01",
# Time == "25" ~ "2022-01-01",
# Time == "26" ~ "2022-02-01",
# Time == "27" ~ "2022-03-01",
# Time == "28" ~ "2022-04-01",
# Time == "29" ~ "2022-05-01",
# Time == "30" ~ "2022-06-01",
# )) %>%
# mutate(Time_ts = as.character(Time_ts)) %>%
# #arrange(home, work) %>%
# mutate(Time_ts = tsibble::yearmonth(as.Date(Time_ts))) %>%
# relocate(Time_ts, H_adm_nm, W_commune_nm, time_distance, college, flow)
#data5.4.2 covid cases
#covid <- readxl::read_xlsx("data_covid/seoul_covid.xlsx")
#
#covid <- covid %>%
# mutate(ymd = str_sub(ymd, 1, 7)) %>%
# group_by(ymd) %>%
# summarise(new = sum(new)) %>%
# mutate(ymd = ifelse(ymd %in% c("20.02.0", "20.02.1", "20.02.2"), "2020.01", ymd)) %>%
# group_by(ymd) %>%
# summarise(new = sum(new)) %>%
# mutate(ymd = str_c(str_sub(ymd, 1, 4),
# "-",
# str_sub(ymd, 6, 7),
# "-01")) %>%
# mutate(ymd = yearmonth(as.Date(ymd))) %>%
# slice(1:30)
#
#data <- data %>%
# left_join(covid, by = c("Time_ts" = "ymd"))
#data5.5 filter out flows not heading to employment centers
5.5.1 set up
data <- read_csv("data_longitudinal/flow_30months_concatenated_20230528.csv") %>%
mutate(H_adm_nm = as.character(H_adm_nm),
H_adm_cd_shp = as.character(H_adm_cd_shp)) %>%
mutate(W_commune_nm = ifelse(W_commune_nm == "Myeong", "Myeongdong", W_commune_nm))
data:) # A tibble: 1,439,415 × 10
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
:) 1 2020 1 가평군_가평읍 Anam 47.0 0.360 6.56 3137011 1 가평군_가평읍 -> Anam 0
:) 2 2020 1 가평군_가평읍 Bangbae 47.8 0.360 45.4 3137011 1 가평군_가평읍 -> Bangbae 0
:) 3 2020 1 가평군_가평읍 Banghak 30 0.360 18.1 3137011 1 가평군_가평읍 -> Banghak 0
:) 4 2020 1 가평군_가평읍 Bangi 37.3 0.360 123. 3137011 1 가평군_가평읍 -> Bangi 0
:) 5 2020 1 가평군_가평읍 Changshin 34.5 0.360 135. 3137011 1 가평군_가평읍 -> Changshin 0
:) 6 2020 1 가평군_가평읍 Cheongdam 47.5 0.360 134. 3137011 1 가평군_가평읍 -> Cheongdam 0
:) 7 2020 1 가평군_가평읍 Daechi 34.9 0.360 123. 3137011 1 가평군_가평읍 -> Daechi 0
:) 8 2020 1 가평군_가평읍 Daehak 60 0.360 2 3137011 1 가평군_가평읍 -> Daehak 0
:) 9 2020 1 가평군_가평읍 Gil 20 0.360 24.1 3137011 1 가평군_가평읍 -> Gil 0
:) 10 2020 1 가평군_가평읍 Gongneung 35.5 0.360 101. 3137011 1 가평군_가평읍 -> Gongneung 0
:) # ℹ 1,439,405 more rows
employment_centers <- dong.sf_commune_filtered %>%
st_drop_geometry() %>%
pull(name)
employment_centers:) [1] "Cheongdam" "Daechi" "Guro" "Gwanghui" "Jamsil" "Jongno" "Munjeong" "Myeongdong" "Samseong" "Seocho" "Seongsu" "Sinchon" "Yeoksam" "Yeongdeungpo" "Yeoui"
:) # A tibble: 444,926 × 10
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
:) 1 2020 1 가평군_가평읍 Cheongdam 47.5 0.360 134. 3137011 1 가평군_가평읍 -> Cheongdam 0
:) 2 2020 1 가평군_가평읍 Daechi 34.9 0.360 123. 3137011 1 가평군_가평읍 -> Daechi 0
:) 3 2020 1 가평군_가평읍 Guro 42.8 0.360 34.7 3137011 1 가평군_가평읍 -> Guro 0
:) 4 2020 1 가평군_가평읍 Gwanghui 40 0.360 131. 3137011 1 가평군_가평읍 -> Gwanghui 0
:) 5 2020 1 가평군_가평읍 Jamsil 46.6 0.360 9.13 3137011 1 가평군_가평읍 -> Jamsil 0
:) 6 2020 1 가평군_가평읍 Jongno 37.1 0.360 118. 3137011 1 가평군_가평읍 -> Jongno 0
:) 7 2020 1 가평군_가평읍 Munjeong 71.6 0.360 12.6 3137011 1 가평군_가평읍 -> Munjeong 0
:) 8 2020 1 가평군_가평읍 Myeongdong 54.3 0.360 61.1 3137011 1 가평군_가평읍 -> Myeong 0
:) 9 2020 1 가평군_가평읍 Samseong 61.9 0.360 46.2 3137011 1 가평군_가평읍 -> Samseong 0
:) 10 2020 1 가평군_가평읍 Seocho 43.3 0.360 100. 3137011 1 가평군_가평읍 -> Seocho 0
:) # ℹ 444,916 more rows
5.6 filter out void flows
5.6.1 NA
420 = 30(months) * 14(employment centers). 즉, 출발지역 하나만 놓친 정도로 준수함.
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new
:) 0 450 0 0 450 0 450 0 450 0
:) # A tibble: 444,476 × 10
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
:) 1 2020 1 가평군_가평읍 Cheongdam 47.5 0.360 134. 3137011 1 가평군_가평읍 -> Cheongdam 0
:) 2 2020 1 가평군_가평읍 Daechi 34.9 0.360 123. 3137011 1 가평군_가평읍 -> Daechi 0
:) 3 2020 1 가평군_가평읍 Guro 42.8 0.360 34.7 3137011 1 가평군_가평읍 -> Guro 0
:) 4 2020 1 가평군_가평읍 Gwanghui 40 0.360 131. 3137011 1 가평군_가평읍 -> Gwanghui 0
:) 5 2020 1 가평군_가평읍 Jamsil 46.6 0.360 9.13 3137011 1 가평군_가평읍 -> Jamsil 0
:) 6 2020 1 가평군_가평읍 Jongno 37.1 0.360 118. 3137011 1 가평군_가평읍 -> Jongno 0
:) 7 2020 1 가평군_가평읍 Munjeong 71.6 0.360 12.6 3137011 1 가평군_가평읍 -> Munjeong 0
:) 8 2020 1 가평군_가평읍 Myeongdong 54.3 0.360 61.1 3137011 1 가평군_가평읍 -> Myeong 0
:) 9 2020 1 가평군_가평읍 Samseong 61.9 0.360 46.2 3137011 1 가평군_가평읍 -> Samseong 0
:) 10 2020 1 가평군_가평읍 Seocho 43.3 0.360 100. 3137011 1 가평군_가평읍 -> Seocho 0
:) # ℹ 444,466 more rows
5.6.2 missing values
hw_link 별 count가 30이 아니면, 그 hw_link 계정 자체를 지움 20,149 -> 16,390
hw_link_survived <- data %>%
group_by(hw_link) %>%
count() %>%
filter(n == 30) %>%
pull(hw_link)
hw_link_survived:) [1] "가평군_가평읍 -> Cheongdam" "가평군_가평읍 -> Guro" "가평군_가평읍 -> Gwanghui" "가평군_가평읍 -> Jongno" "가평군_가평읍 -> Myeong" "가평군_가평읍 -> Seocho" "가평군_가평읍 -> Seongsu" "가평군_가평읍 -> Yeoksam" "가평군_가평읍 -> Yeoui" "가평군_상면 -> Samseong" "가평군_상면 -> Seocho" "가평군_설악면 -> Gwanghui" "가평군_설악면 -> Jamsil" "가평군_설악면 -> Jongno"
:) [15] "가평군_설악면 -> Samseong" "가평군_설악면 -> Seocho" "가평군_설악면 -> Yeoksam" "가평군_설악면 -> Yeongdeungpo" "가평군_청평면 -> Cheongdam" "가평군_청평면 -> Gwanghui" "가평군_청평면 -> Jamsil" "가평군_청평면 -> Jongno" "가평군_청평면 -> Seocho" "가평군_청평면 -> Seongsu" "가평군_청평면 -> Yeoksam" "강남구_개포1동 -> Cheongdam" "강남구_개포1동 -> Daechi" "강남구_개포1동 -> Guro"
:) [29] "강남구_개포1동 -> Gwanghui" "강남구_개포1동 -> Jamsil" "강남구_개포1동 -> Jongno" "강남구_개포1동 -> Munjeong" "강남구_개포1동 -> Myeong" "강남구_개포1동 -> Samseong" "강남구_개포1동 -> Seocho" "강남구_개포1동 -> Seongsu" "강남구_개포1동 -> Sinchon" "강남구_개포1동 -> Yeoksam" "강남구_개포1동 -> Yeongdeungpo" "강남구_개포1동 -> Yeoui" "강남구_개포2동 -> Cheongdam" "강남구_개포2동 -> Daechi"
:) [43] "강남구_개포2동 -> Guro" "강남구_개포2동 -> Gwanghui" "강남구_개포2동 -> Jamsil" "강남구_개포2동 -> Jongno" "강남구_개포2동 -> Munjeong" "강남구_개포2동 -> Myeong" "강남구_개포2동 -> Samseong" "강남구_개포2동 -> Seocho" "강남구_개포2동 -> Seongsu" "강남구_개포2동 -> Sinchon" "강남구_개포2동 -> Yeoksam" "강남구_개포2동 -> Yeongdeungpo" "강남구_개포2동 -> Yeoui" "강남구_개포4동 -> Cheongdam"
:) [57] "강남구_개포4동 -> Daechi" "강남구_개포4동 -> Guro" "강남구_개포4동 -> Gwanghui" "강남구_개포4동 -> Jamsil" "강남구_개포4동 -> Jongno" "강남구_개포4동 -> Munjeong" "강남구_개포4동 -> Myeong" "강남구_개포4동 -> Samseong" "강남구_개포4동 -> Seocho" "강남구_개포4동 -> Seongsu" "강남구_개포4동 -> Sinchon" "강남구_개포4동 -> Yeoksam" "강남구_개포4동 -> Yeongdeungpo" "강남구_개포4동 -> Yeoui"
:) [71] "강남구_논현1동 -> Cheongdam" "강남구_논현1동 -> Daechi" "강남구_논현1동 -> Guro" "강남구_논현1동 -> Gwanghui" "강남구_논현1동 -> Jamsil"
:) [ reached getOption("max.print") -- omitted 13388 entries ]
:) # A tibble: 403,890 × 10
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
:) 1 2020 1 가평군_가평읍 Cheongdam 47.5 0.360 134. 3137011 1 가평군_가평읍 -> Cheongdam 0
:) 2 2020 1 가평군_가평읍 Guro 42.8 0.360 34.7 3137011 1 가평군_가평읍 -> Guro 0
:) 3 2020 1 가평군_가평읍 Gwanghui 40 0.360 131. 3137011 1 가평군_가평읍 -> Gwanghui 0
:) 4 2020 1 가평군_가평읍 Jongno 37.1 0.360 118. 3137011 1 가평군_가평읍 -> Jongno 0
:) 5 2020 1 가평군_가평읍 Myeongdong 54.3 0.360 61.1 3137011 1 가평군_가평읍 -> Myeong 0
:) 6 2020 1 가평군_가평읍 Seocho 43.3 0.360 100. 3137011 1 가평군_가평읍 -> Seocho 0
:) 7 2020 1 가평군_가평읍 Seongsu 30.1 0.360 49.0 3137011 1 가평군_가평읍 -> Seongsu 0
:) 8 2020 1 가평군_가평읍 Yeoksam 43.3 0.360 20.7 3137011 1 가평군_가평읍 -> Yeoksam 0
:) 9 2020 1 가평군_가평읍 Yeoui 36.9 0.360 103. 3137011 1 가평군_가평읍 -> Yeoui 0
:) 10 2020 1 가평군_상면 Samseong 40 0.397 71.7 3137033 1 가평군_상면 -> Samseong 0
:) # ℹ 403,880 more rows
5.7 filter out thin or unstable links
:) # A tibble: 13,463 × 2
:) hw_link flow_mean
:) <chr> <dbl>
:) 1 가평군_가평읍 -> Cheongdam 74.0
:) 2 가평군_가평읍 -> Guro 58.0
:) 3 가평군_가평읍 -> Gwanghui 104.
:) 4 가평군_가평읍 -> Jongno 61.5
:) 5 가평군_가평읍 -> Myeong 77.7
:) 6 가평군_가평읍 -> Seocho 61.2
:) 7 가평군_가평읍 -> Seongsu 103.
:) 8 가평군_가평읍 -> Yeoksam 60.8
:) 9 가평군_가평읍 -> Yeoui 44.3
:) 10 가평군_상면 -> Samseong 52.7
:) # ℹ 13,453 more rows
p1 <- a %>%
ggplot() +
geom_histogram(aes(flow_mean))
p2 <- a %>%
filter(flow_mean < quantile(flow_mean, 0.25)) %>%
ggplot() +
geom_histogram(aes(flow_mean))
ggpubr::ggarrange(p1, p2, ncol = 2)data %>%
group_by(hw_link) %>%
mutate(flow_mean = mean(flow)) %>%
ungroup() %>% # 중요함
filter(flow_mean == quantile(flow_mean, 0.25)) %>%
summarise(twentyfifthpercentile = mean(flow)):) # A tibble: 1 × 1
:) twentyfifthpercentile
:) <dbl>
:) 1 319.
# outlier 제거 위해 좀 더 엄정한 기준 적용
# 30개월동안의 평균치가 일정 기준 이하인 것을 찾도록 함
# threshold = 0.1, 0.2, 0.25, 0.3 다양하게 시도해보기
data_filtered <- data %>%
group_by(hw_link) %>%
mutate(flow_mean = mean(flow)) %>%
ungroup() %>% # 중요함
filter(flow_mean >= quantile(flow_mean, 0.25))
data_filtered:) # A tibble: 302,940 × 11
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new flow_mean
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 17.0 0.889 1791. 1123068 1 강남구_개포1동 -> Cheongdam 0 1583.
:) 2 2020 1 강남구_개포1동 Daechi 14.1 0.889 3265. 1123068 1 강남구_개포1동 -> Daechi 0 3098.
:) 3 2020 1 강남구_개포1동 Guro 31.4 0.889 414. 1123068 1 강남구_개포1동 -> Guro 0 322.
:) 4 2020 1 강남구_개포1동 Gwanghui 23.8 0.889 344. 1123068 1 강남구_개포1동 -> Gwanghui 0 356.
:) 5 2020 1 강남구_개포1동 Jamsil 17.0 0.889 947. 1123068 1 강남구_개포1동 -> Jamsil 0 615.
:) 6 2020 1 강남구_개포1동 Jongno 24.7 0.889 1209. 1123068 1 강남구_개포1동 -> Jongno 0 755.
:) 7 2020 1 강남구_개포1동 Munjeong 22.4 0.889 451. 1123068 1 강남구_개포1동 -> Munjeong 0 549.
:) 8 2020 1 강남구_개포1동 Myeongdong 24.1 0.889 870. 1123068 1 강남구_개포1동 -> Myeong 0 965.
:) 9 2020 1 강남구_개포1동 Samseong 15.6 0.889 1763. 1123068 1 강남구_개포1동 -> Samseong 0 1255.
:) 10 2020 1 강남구_개포1동 Seocho 15.6 0.889 837. 1123068 1 강남구_개포1동 -> Seocho 0 791.
:) # ℹ 302,930 more rows
:) # A tibble: 302,940 × 11
:) # Groups: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm time_distance college flow H_adm_cd_shp Time hw_link new flow_mean
:) <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 17.0 0.889 1791. 1123068 1 강남구_개포1동 -> Cheongdam 0 1583.
:) 2 2020 1 강남구_개포1동 Daechi 14.1 0.889 3265. 1123068 1 강남구_개포1동 -> Daechi 0 3098.
:) 3 2020 1 강남구_개포1동 Guro 31.4 0.889 414. 1123068 1 강남구_개포1동 -> Guro 0 322.
:) 4 2020 1 강남구_개포1동 Gwanghui 23.8 0.889 344. 1123068 1 강남구_개포1동 -> Gwanghui 0 356.
:) 5 2020 1 강남구_개포1동 Jamsil 17.0 0.889 947. 1123068 1 강남구_개포1동 -> Jamsil 0 615.
:) 6 2020 1 강남구_개포1동 Jongno 24.7 0.889 1209. 1123068 1 강남구_개포1동 -> Jongno 0 755.
:) 7 2020 1 강남구_개포1동 Munjeong 22.4 0.889 451. 1123068 1 강남구_개포1동 -> Munjeong 0 549.
:) 8 2020 1 강남구_개포1동 Myeongdong 24.1 0.889 870. 1123068 1 강남구_개포1동 -> Myeong 0 965.
:) 9 2020 1 강남구_개포1동 Samseong 15.6 0.889 1763. 1123068 1 강남구_개포1동 -> Samseong 0 1255.
:) 10 2020 1 강남구_개포1동 Seocho 15.6 0.889 837. 1123068 1 강남구_개포1동 -> Seocho 0 791.
:) # ℹ 302,930 more rows
유효한 outflow가 있는 출발지역
resid_effective <- data_filtered %>% distinct(H_adm_nm) %>% as.vector()
resid_effective <- resid_effective$H_adm_nm
resid_effective:) [1] "강남구_개포1동" "강남구_개포2동" "강남구_개포4동" "강남구_논현1동" "강남구_논현2동" "강남구_대치1동" "강남구_대치2동" "강남구_대치4동" "강남구_도곡1동" "강남구_도곡2동" "강남구_삼성1동" "강남구_삼성2동" "강남구_세곡동" "강남구_수서동" "강남구_신사동" "강남구_압구정동" "강남구_역삼1동" "강남구_역삼2동" "강남구_일원1동" "강남구_일원2동" "강남구_일원본동" "강남구_청담동" "강동구_강일동" "강동구_고덕1동" "강동구_고덕2동" "강동구_길동"
:) [27] "강동구_둔촌2동" "강동구_명일1동" "강동구_명일2동" "강동구_상일동" "강동구_성내1동" "강동구_성내2동" "강동구_성내3동" "강동구_암사1동" "강동구_암사2동" "강동구_암사3동" "강동구_천호1동" "강동구_천호2동" "강동구_천호3동" "강북구_미아동" "강북구_번1동" "강북구_번2동" "강북구_번3동" "강북구_삼각산동" "강북구_삼양동" "강북구_송중동" "강북구_송천동" "강북구_수유1동" "강북구_수유2동" "강북구_수유3동" "강북구_우이동" "강북구_인수동"
:) [53] "강서구_가양1동" "강서구_가양2동" "강서구_가양3동" "강서구_공항동" "강서구_등촌1동" "강서구_등촌2동" "강서구_등촌3동" "강서구_발산1동" "강서구_방화1동" "강서구_방화2동" "강서구_방화3동" "강서구_염창동" "강서구_우장산동" "강서구_화곡1동" "강서구_화곡2동" "강서구_화곡3동" "강서구_화곡4동" "강서구_화곡6동" "강서구_화곡8동" "강서구_화곡본동" "계양구_계산1동" "계양구_계산2동" "계양구_계산3동"
:) [ reached getOption("max.print") -- omitted 838 entries ]
유효한 outflow가 하나도 없는 출발지역
:) # A tibble: 210 × 1
:) adm_nm
:) <chr>
:) 1 강동구_둔촌1동
:) 2 중구_연안동
:) 3 중구_도원동
:) 4 중구_율목동
:) 5 중구_동인천동
:) 6 중구_송월동
:) 7 중구_용유동
:) 8 동구_만석동
:) 9 동구_화수1.화평동
:) 10 동구_화수2동
:) # ℹ 200 more rows
유효한 outflow가 많은 순서대로
:) # A tibble: 913 × 2
:) H_adm_nm count
:) <chr> <int>
:) 1 강남구_개포2동 450
:) 2 강남구_논현1동 450
:) 3 강남구_논현2동 450
:) 4 강남구_대치1동 450
:) 5 강남구_대치2동 450
:) 6 강남구_대치4동 450
:) 7 강남구_도곡1동 450
:) 8 강남구_도곡2동 450
:) 9 강남구_삼성2동 450
:) 10 강남구_세곡동 450
:) # ℹ 903 more rows
5.8 mapping origins that are effective
5.8.1 figure 4a
dong.sf_resid_filtered <- dong.sf_resid %>%
filter(adm_nm %in% resid_effective)
dong.sf_resid_filtered:) Simple feature collection with 913 features and 4 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 913 × 5
:) adm_nm adm_cd geometry college num_of_neighbors
:) * <chr> <chr> <MULTIPOLYGON [m]> <dbl> <int>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9... 0.808 7
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9... 0.706 7
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9... 0.708 7
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9... 0.779 9
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9... 0.780 7
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9... 0.675 6
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9... 0.603 4
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9... 0.682 10
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9... 0.537 8
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9... 0.645 6
:) # ℹ 903 more rows
5.9 define college quartile
5.9.1 college quartile (SMA)
dong.sf_resid_filtered <- dong.sf_resid_filtered %>%
mutate(col_qrt = case_when(
quantile(college, 0.75) < college ~ "high(Q4)",
quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
mutate(col_qrt = factor(col_qrt, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered:) Simple feature collection with 913 features and 5 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 913 × 6
:) adm_nm adm_cd geometry college num_of_neighbors col_qrt
:) * <chr> <chr> <MULTIPOLYGON [m]> <dbl> <int> <fct>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9... 0.808 7 high(Q4)
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9... 0.706 7 high(Q4)
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9... 0.708 7 high(Q4)
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9... 0.779 9 high(Q4)
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9... 0.780 7 high(Q4)
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9... 0.675 6 high(Q4)
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9... 0.603 4 middle-high(Q3)
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9... 0.682 10 high(Q4)
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9... 0.537 8 middle-high(Q3)
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9... 0.645 6 high(Q4)
:) # ℹ 903 more rows
:) Min. 1st Qu. Median Mean 3rd Qu. Max.
:) 0.144 0.352 0.482 0.497 0.626 0.946
:) Min. 1st Qu. Median Mean 3rd Qu. Max.
:) 14.4 35.2 48.2 49.7 62.6 94.6
5.9.2 college quartile (within Seoul)
dong.sf_resid_filtered_withinseoul <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
select(adm_cd, adm_nm, college) %>% # kosis 아닌 shp 기준의 코드만 남겨나
filter(str_sub(adm_cd, 1, 2) == "11") %>%
mutate(col_qrt_withinSeoul = case_when(
quantile(college, 0.75) < college ~ "high(Q4)",
quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
mutate(col_qrt_withinSeoul = factor(col_qrt_withinSeoul, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered_withinseoul:) # A tibble: 423 × 4
:) adm_cd adm_nm college col_qrt_withinSeoul
:) <chr> <chr> <dbl> <fct>
:) 1 1101053 종로구_사직동 0.808 high(Q4)
:) 2 1101054 종로구_삼청동 0.706 high(Q4)
:) 3 1101055 종로구_부암동 0.708 high(Q4)
:) 4 1101056 종로구_평창동 0.779 high(Q4)
:) 5 1101057 종로구_무악동 0.780 high(Q4)
:) 6 1101058 종로구_교남동 0.675 middle-high(Q3)
:) 7 1101060 종로구_가회동 0.603 middle-high(Q3)
:) 8 1101061 종로구_종로1.2.3.4가동 0.682 middle-high(Q3)
:) 9 1101063 종로구_종로5.6가동 0.537 middle-low(Q2)
:) 10 1101064 종로구_이화동 0.645 middle-high(Q3)
:) # ℹ 413 more rows
5.9.3 college quartile (outof Seoul)
dong.sf_resid_filtered_outofseoul <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
select(adm_cd, adm_nm, college) %>% # kosis 아닌 shp 기준의 코드만 남겨나
filter(str_sub(adm_cd, 1, 2) != "11") %>%
mutate(col_qrt_outofSeoul = case_when(
quantile(college, 0.75) < college ~ "high(Q4)",
quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
mutate(col_qrt_outofSeoul = factor(col_qrt_outofSeoul, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered_outofseoul:) # A tibble: 490 × 4
:) adm_cd adm_nm college col_qrt_outofSeoul
:) <chr> <chr> <dbl> <fct>
:) 1 2301053 중구_신포동 0.341 middle-low(Q2)
:) 2 2301054 중구_신흥동 0.323 middle-low(Q2)
:) 3 2301060 중구_북성동 0.284 low(Q1)
:) 4 2301064 중구_운서동 0.489 middle-high(Q3)
:) 5 2301065 중구_영종동 0.438 middle-high(Q3)
:) 6 2301066 중구_영종1동 0.520 middle-high(Q3)
:) 7 2302055 동구_송현1.2동 0.330 middle-low(Q2)
:) 8 2302060 동구_송림3.5동 0.297 low(Q1)
:) 9 2302063 동구_송림6동 0.273 low(Q1)
:) 10 2304051 연수구_옥련1동 0.330 middle-low(Q2)
:) # ℹ 480 more rows
5.9.4 integrate
dong.sf_resid_filtered <- dong.sf_resid_filtered %>%
left_join(dong.sf_resid_filtered_withinseoul, by = c("adm_cd", "adm_nm", "college")) %>%
left_join(dong.sf_resid_filtered_outofseoul, by = c("adm_cd", "adm_nm", "college"))
dong.sf_resid_filtered:) Simple feature collection with 913 features and 7 fields
:) Geometry type: GEOMETRY
:) Dimension: XY
:) Bounding box: xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 913 × 8
:) adm_nm adm_cd geometry college num_of_neighbors col_qrt col_qrt_withinSeoul col_qrt_outofSeoul
:) <chr> <chr> <MULTIPOLYGON [m]> <dbl> <int> <fct> <fct> <fct>
:) 1 종로구_사직동 1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9... 0.808 7 high(Q4) high(Q4) <NA>
:) 2 종로구_삼청동 1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9... 0.706 7 high(Q4) high(Q4) <NA>
:) 3 종로구_부암동 1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9... 0.708 7 high(Q4) high(Q4) <NA>
:) 4 종로구_평창동 1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9... 0.779 9 high(Q4) high(Q4) <NA>
:) 5 종로구_무악동 1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9... 0.780 7 high(Q4) high(Q4) <NA>
:) 6 종로구_교남동 1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9... 0.675 6 high(Q4) middle-high(Q3) <NA>
:) 7 종로구_가회동 1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9... 0.603 4 middle-high(Q3) middle-high(Q3) <NA>
:) 8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9... 0.682 10 high(Q4) middle-high(Q3) <NA>
:) 9 종로구_종로5.6가동 1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9... 0.537 8 middle-high(Q3) middle-low(Q2) <NA>
:) 10 종로구_이화동 1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9... 0.645 6 high(Q4) middle-high(Q3) <NA>
:) # ℹ 903 more rows
5.10 joining time-invariant predictors
resid_feature <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
select(adm_nm, adm_cd, college, col_qrt, col_qrt_withinSeoul, col_qrt_outofSeoul)emp_feature <- dong.sf_commune_filtered %>%
st_drop_geometry() %>%
select(name, cluster, total_emp, density_emp, cluster, share_of_KBI, share_of_CSI)data_filtered <- data_filtered %>%
select(-college) %>%
left_join(resid_feature, by = c("H_adm_nm" = "adm_nm")) %>%
left_join(emp_feature, by = c("W_commune_nm" = "name")) %>%
relocate(Time_ts, H_adm_nm, W_commune_nm, flow, time_distance, cluster, college)
data_filtered :) # A tibble: 302,940 × 20
:) Time_ts H_adm_nm W_commune_nm flow time_distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:) <chr> <chr> <chr> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 17.0 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 2 2020 1 강남구_개포1동 Daechi 3265. 14.1 Mixed 0.889 1123068 1 강남구_개포1동 -> Daechi 0 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3
:) 3 2020 1 강남구_개포1동 Guro 414. 31.4 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Guro 0 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9
:) 4 2020 1 강남구_개포1동 Gwanghui 344. 23.8 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Gwanghui 0 356. 1123068 high(Q4) high(Q4) <NA> 159615 239. 14.8 44.7
:) 5 2020 1 강남구_개포1동 Jamsil 947. 17.0 Mixed 0.889 1123068 1 강남구_개포1동 -> Jamsil 0 615. 1123068 high(Q4) high(Q4) <NA> 123030 102. 27.1 40.1
:) 6 2020 1 강남구_개포1동 Jongno 1209. 24.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Jongno 0 755. 1123068 high(Q4) high(Q4) <NA> 129040 203. 28.6 32.9
:) 7 2020 1 강남구_개포1동 Munjeong 451. 22.4 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Munjeong 0 549. 1123068 high(Q4) high(Q4) <NA> 110472 98.5 24.9 32.7
:) 8 2020 1 강남구_개포1동 Myeongdong 870. 24.1 Financial 0.889 1123068 1 강남구_개포1동 -> Myeong 0 965. 1123068 high(Q4) high(Q4) <NA> 217113 773. 40.1 29.7
:) 9 2020 1 강남구_개포1동 Samseong 1763. 15.6 Professional 0.889 1123068 1 강남구_개포1동 -> Samseong 0 1255. 1123068 high(Q4) high(Q4) <NA> 120988 320. 31.0 37.8
:) 10 2020 1 강남구_개포1동 Seocho 837. 15.6 Professional 0.889 1123068 1 강남구_개포1동 -> Seocho 0 791. 1123068 high(Q4) high(Q4) <NA> 103264 236. 35.7 25.2
:) # ℹ 302,930 more rows
6 EDA
6.1 import data
:) # A tibble: 302,940 × 20
:) Time_ts H_adm_nm W_commune_nm flow time_distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:) <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 17.0 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 2 2020 1 강남구_개포1동 Daechi 3265. 14.1 Mixed 0.889 1123068 1 강남구_개포1동 -> Daechi 0 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3
:) 3 2020 1 강남구_개포1동 Guro 414. 31.4 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Guro 0 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9
:) 4 2020 1 강남구_개포1동 Gwanghui 344. 23.8 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Gwanghui 0 356. 1123068 high(Q4) high(Q4) <NA> 159615 239. 14.8 44.7
:) 5 2020 1 강남구_개포1동 Jamsil 947. 17.0 Mixed 0.889 1123068 1 강남구_개포1동 -> Jamsil 0 615. 1123068 high(Q4) high(Q4) <NA> 123030 102. 27.1 40.1
:) 6 2020 1 강남구_개포1동 Jongno 1209. 24.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Jongno 0 755. 1123068 high(Q4) high(Q4) <NA> 129040 203. 28.6 32.9
:) 7 2020 1 강남구_개포1동 Munjeong 451. 22.4 Manufacturing 0.889 1123068 1 강남구_개포1동 -> Munjeong 0 549. 1123068 high(Q4) high(Q4) <NA> 110472 98.5 24.9 32.7
:) 8 2020 1 강남구_개포1동 Myeongdong 870. 24.1 Financial 0.889 1123068 1 강남구_개포1동 -> Myeong 0 965. 1123068 high(Q4) high(Q4) <NA> 217113 773. 40.1 29.7
:) 9 2020 1 강남구_개포1동 Samseong 1763. 15.6 Professional 0.889 1123068 1 강남구_개포1동 -> Samseong 0 1255. 1123068 high(Q4) high(Q4) <NA> 120988 320. 31.0 37.8
:) 10 2020 1 강남구_개포1동 Seocho 837. 15.6 Professional 0.889 1123068 1 강남구_개포1동 -> Seocho 0 791. 1123068 high(Q4) high(Q4) <NA> 103264 236. 35.7 25.2
:) # ℹ 302,930 more rows
data_filtered_ts <- data_filtered %>%
mutate(Time_ts = yearmonth(Time_ts)) %>%
tsibble::as_tsibble(index = Time_ts, key = hw_link) %>%
mutate(cluster = factor(cluster, levels = c("Manufacturing", "Mixed", "Professional", "Financial"))) %>%
mutate(col_qrt = factor(col_qrt,
levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")),
col_qrt_withinSeoul = factor(col_qrt_withinSeoul,
levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)"))) %>%
mutate(H_adm_cd_shp = as.character(H_adm_cd_shp))
data_filtered_ts :) # A tsibble: 302,940 x 20 [1M]
:) # Key: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm flow time_distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:) <mth> <chr> <chr> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 17.0 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 2 2020 2 강남구_개포1동 Cheongdam 1494. 16.6 Mixed 0.889 1123068 2 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 3 2020 3 강남구_개포1동 Cheongdam 1835. 16.6 Mixed 0.889 1123068 3 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 4 2020 4 강남구_개포1동 Cheongdam 1894. 16.6 Mixed 0.889 1123068 4 강남구_개포1동 -> Cheongdam 7 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 5 2020 5 강남구_개포1동 Cheongdam 1946. 16.9 Mixed 0.889 1123068 5 강남구_개포1동 -> Cheongdam 228 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 6 2020 6 강남구_개포1동 Cheongdam 2073. 18.2 Mixed 0.889 1123068 6 강남구_개포1동 -> Cheongdam 451 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 7 2020 7 강남구_개포1동 Cheongdam 2017. 16.8 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 8 2020 8 강남구_개포1동 Cheongdam 1581. 17.2 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 9 2020 9 강남구_개포1동 Cheongdam 1639. 15.6 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 10 2020 10 강남구_개포1동 Cheongdam 1669. 16.2 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) # ℹ 302,930 more rows
:) Time_ts H_adm_nm W_commune_nm flow time_distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 125370 177570 0 0 0 0
6.2 impute time-invariant time distance
data_filtered_ts <- data_filtered_ts %>%
group_by(hw_link) %>%
mutate(time_distance = weighted.mean(time_distance, flow)) %>%
rename(distance = time_distance) %>%
ungroup()
data_filtered_ts:) # A tsibble: 302,940 x 20 [1M]
:) # Key: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm flow distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:) <mth> <chr> <chr> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 17.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 2 2020 2 강남구_개포1동 Cheongdam 1494. 17.7 Mixed 0.889 1123068 2 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 3 2020 3 강남구_개포1동 Cheongdam 1835. 17.7 Mixed 0.889 1123068 3 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 4 2020 4 강남구_개포1동 Cheongdam 1894. 17.7 Mixed 0.889 1123068 4 강남구_개포1동 -> Cheongdam 7 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 5 2020 5 강남구_개포1동 Cheongdam 1946. 17.7 Mixed 0.889 1123068 5 강남구_개포1동 -> Cheongdam 228 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 6 2020 6 강남구_개포1동 Cheongdam 2073. 17.7 Mixed 0.889 1123068 6 강남구_개포1동 -> Cheongdam 451 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 7 2020 7 강남구_개포1동 Cheongdam 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 8 2020 8 강남구_개포1동 Cheongdam 1581. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 9 2020 9 강남구_개포1동 Cheongdam 1639. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) 10 2020 10 강남구_개포1동 Cheongdam 1669. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8
:) # ℹ 302,930 more rows
6.3 bizdays
business_calendar_wo_holidays <- bizdays::create.calendar('my_calendar',
weekdays = c('saturday','sunday'))
business_calendar_wo_holidays:) my_calendar financial calendar
:) 0 holidays
:) 2 weekdays (saturday, sunday)
:) range from 1970-01-01 to 2071-01-01
:) bizdays arguments adjust
:) from: none
:) to: none
business_calendar_w_holidays <- bizdays::create.calendar('my_calendar',
weekdays = c('saturday','sunday'),
holidays = c(
"2020-01-01", "2020-01-24", "2020-01-25", "2020-01-26", "2020-03-01",
"2020-04-30", "2020-05-05", "2020-06-06", "2020-08-15", "2020-08-17",
"2020-09-30", "2020-10-01", "2020-10-02", "2020-10-03", "2020-10-09",
"2020-12-25",
"2021-01-01", "2021-02-11", "2021-02-12", "2021-03-01", "2021-05-05",
"2021-05-19", "2021-06-06", "2021-08-15", "2021-09-20", "2021-09-21",
"2021-10-03", "2021-10-09", "2021-12-25",
"2022-01-01", "2022-01-31", "2022-02-01", "2022-02-02", "2022-03-01",
"2022-03-09", "2022-05-05", "2022-05-08", "2022-06-01", "2022-06-06",
"2022-08-15", "2022-09-09", "2022-09-10", "2022-09-11", "2022-09-12",
"2022-10-03", "2022-10-09", "2022-10-10", "2022-12-25"
))
business_calendar_w_holidays:) my_calendar financial calendar
:) 48 holidays
:) 2 weekdays (saturday, sunday)
:) range from 2020-01-01 to 2022-12-25
:) bizdays arguments adjust
:) from: none
:) to: none
n_weekdays_wo_hol: weekdays(whole month - weekends) n_weekdays_w_hol: actual business days(whole month - weekends - non weekend holidays) therefore n_weekdays_wo_hol > n_weekdays_w_hol Time_ts = yearmonth(Time_ts) + 1)
calendar <- data_filtered_ts %>%
tibble() %>%
distinct(Time_ts) %>%
mutate(start = lubridate::as_date(Time_ts),
end = lubridate::as_date(Time_ts + 1) - 1) %>%
mutate(end = lubridate::as_date(end)) %>%
mutate(n_weekdays_wo_hol = bizdays::bizdays(start, end, cal = business_calendar_wo_holidays),
n_weekdays_w_hol = bizdays::bizdays(start, end, cal = business_calendar_w_holidays))
calendar:) # A tibble: 30 × 5
:) Time_ts start end n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <date> <date> <dbl> <dbl>
:) 1 2020 1 2020-01-01 2020-01-31 22 20
:) 2 2020 2 2020-02-01 2020-02-29 19 19
:) 3 2020 3 2020-03-01 2020-03-31 21 21
:) 4 2020 4 2020-04-01 2020-04-30 21 20
:) 5 2020 5 2020-05-01 2020-05-31 20 19
:) 6 2020 6 2020-06-01 2020-06-30 21 21
:) 7 2020 7 2020-07-01 2020-07-31 22 22
:) 8 2020 8 2020-08-01 2020-08-31 20 19
:) 9 2020 9 2020-09-01 2020-09-30 21 20
:) 10 2020 10 2020-10-01 2020-10-31 21 18
:) # ℹ 20 more rows
6.4 entire period again: flow_corrected
data_filtered_ts %>%
dplyr::left_join(calendar, by = "Time_ts") %>%
dplyr::select(-start) %>%
dplyr::select(-end) :) # A tsibble: 302,940 x 22 [1M]
:) # Key: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm flow distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <chr> <chr> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 17.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 20
:) 2 2020 2 강남구_개포1동 Cheongdam 1494. 17.7 Mixed 0.889 1123068 2 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 19 19
:) 3 2020 3 강남구_개포1동 Cheongdam 1835. 17.7 Mixed 0.889 1123068 3 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 4 2020 4 강남구_개포1동 Cheongdam 1894. 17.7 Mixed 0.889 1123068 4 강남구_개포1동 -> Cheongdam 7 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 5 2020 5 강남구_개포1동 Cheongdam 1946. 17.7 Mixed 0.889 1123068 5 강남구_개포1동 -> Cheongdam 228 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 6 2020 6 강남구_개포1동 Cheongdam 2073. 17.7 Mixed 0.889 1123068 6 강남구_개포1동 -> Cheongdam 451 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 7 2020 7 강남구_개포1동 Cheongdam 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22
:) 8 2020 8 강남구_개포1동 Cheongdam 1581. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 9 2020 9 강남구_개포1동 Cheongdam 1639. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 10 2020 10 강남구_개포1동 Cheongdam 1669. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18
:) # ℹ 302,930 more rows
(flow * n_weekdays_wo_hol): the monthly total commutes reported by the data (flow * n_weekdays_wo_hol) / n_weekdays_w_hol: devide the monthly total commutes by the number of actual business days months with none of non-weekend-holidays get their existing volumes months with more number of non-weekend-holidays get enhanced volumes
data_filtered_ts <- data_filtered_ts %>%
dplyr::left_join(calendar, by = "Time_ts") %>%
dplyr::select(-start) %>%
dplyr::select(-end) %>%
mutate(flow_corrected = (flow * n_weekdays_wo_hol) / n_weekdays_w_hol) %>%
mutate(flow_corrected = (flow + flow_corrected) / 2) %>%
relocate(Time_ts, H_adm_nm, W_commune_nm, flow, flow_corrected)
data_filtered_ts:) # A tsibble: 302,940 x 23 [1M]
:) # Key: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm flow flow_corrected distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 1881. 17.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 20
:) 2 2020 2 강남구_개포1동 Cheongdam 1494. 1494. 17.7 Mixed 0.889 1123068 2 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 19 19
:) 3 2020 3 강남구_개포1동 Cheongdam 1835. 1835. 17.7 Mixed 0.889 1123068 3 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 4 2020 4 강남구_개포1동 Cheongdam 1894. 1942. 17.7 Mixed 0.889 1123068 4 강남구_개포1동 -> Cheongdam 7 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 5 2020 5 강남구_개포1동 Cheongdam 1946. 1997. 17.7 Mixed 0.889 1123068 5 강남구_개포1동 -> Cheongdam 228 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 6 2020 6 강남구_개포1동 Cheongdam 2073. 2073. 17.7 Mixed 0.889 1123068 6 강남구_개포1동 -> Cheongdam 451 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 7 2020 7 강남구_개포1동 Cheongdam 2017. 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22
:) 8 2020 8 강남구_개포1동 Cheongdam 1581. 1622. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 9 2020 9 강남구_개포1동 Cheongdam 1639. 1679. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 10 2020 10 강남구_개포1동 Cheongdam 1669. 1808. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18
:) # ℹ 302,930 more rows
6.5 output
6.5.1 서울시 총통근량
data_filtered_ts_forplot <- data_filtered_ts %>%
mutate(Time_ts = as.Date(Time_ts)) %>%
ungroup() %>%
as_tibble() %>%
group_by(Time_ts) %>%
summarise(total_flow = sum(flow_corrected) * 4,
new = mean(new))
data_filtered_ts_forplot:) # A tibble: 30 × 3
:) Time_ts total_flow new
:) <date> <dbl> <dbl>
:) 1 2020-01-01 91642993. 0
:) 2 2020-02-01 68017298. 0
:) 3 2020-03-01 74808765. 0
:) 4 2020-04-01 74605227. 7
:) 5 2020-05-01 72361670. 228
:) 6 2020-06-01 80733427. 451
:) 7 2020-07-01 84449629. 288
:) 8 2020-08-01 66633597. 2267
:) 9 2020-09-01 71421807. 1424
:) 10 2020-10-01 79850513. 719
:) # ℹ 20 more rows
:) [1] 0.01 0.01 0.01 7.01 228.01 451.01 288.01 2267.01 1424.01 719.01 2802.01 10209.01 5168.01 4083.01 3803.01 5811.01 6303.01 6075.01 14377.01 15019.01 21148.01 19123.01 34711.01 70977.01 52140.01 498842.01 1983383.01 722562.01 135270.01 49289.01
:) [1] -2.000 -2.000 -2.000 0.846 2.358 2.654 2.459 3.355 3.154 2.857 3.447 4.009 3.713 3.611 3.580 3.764 3.800 3.784 4.158 4.177 4.325 4.282 4.540 4.851 4.717 5.698 6.297 5.859 5.131 4.693
:) [1] 0.01 0.01 0.01 7.01 228.01 451.01 288.01 2267.01 1424.01 719.01 2802.01 10209.01 5168.01 4083.01 3803.01 5811.01 6303.01 6075.01 14377.01 15019.01 21148.01 19123.01 34711.01 70977.01 52140.01 498842.01 1983383.01 722562.01 135270.01 49289.01
## 문자열 깨질 때 로케일 설정 - 윈도우
#Sys.getlocale()
#localeToCharset()
#
## 미국 로케일로 로케일을 변환하기
#Sys.setlocale(category = 'LC_ALL',locale = 'english')
#localeToCharset()
#
## 우리나라로 로케일 변경하기
#Sys.setlocale(category = 'LC_ALL',locale='korean')
#localeToCharset()## 문자열 깨질 때 로케일 설정 - 윈도우
#Sys.getlocale()
#localeToCharset()
#
## 미국 로케일로 로케일을 변환하기
#Sys.setlocale(category = 'LC_ALL',locale = 'english')
#localeToCharset()
#
## 우리나라로 로케일 변경하기
#Sys.setlocale(category = 'LC_ALL',locale='korean')
#localeToCharset()6.6 window cut
data_filtered_ts %>%
as_tibble() %>%
ungroup() %>%
group_by(Time_ts) %>%
summarise(mean(flow), mean(flow_corrected)):) # A tibble: 30 × 3
:) Time_ts `mean(flow)` `mean(flow_corrected)`
:) <mth> <dbl> <dbl>
:) 1 2020 1 2161. 2269.
:) 2 2020 2 1684. 1684.
:) 3 2020 3 1852. 1852.
:) 4 2020 4 1802. 1847.
:) 5 2020 5 1746. 1791.
:) 6 2020 6 1999. 1999.
:) 7 2020 7 2091. 2091.
:) 8 2020 8 1607. 1650.
:) 9 2020 9 1725. 1768.
:) 10 2020 10 1825. 1977.
:) # ℹ 20 more rows
:) $Time_ts
:) <yearmonth[30]>
:) [1] "2020 1" "2020 2" "2020 3" "2020 4" "2020 5" "2020 6" "2020 7" "2020 8" "2020 9" "2020 10" "2020 11" "2020 12" "2021 1" "2021 2" "2021 3" "2021 4" "2021 5" "2021 6" "2021 7" "2021 8" "2021 9" "2021 10" "2021 11" "2021 12" "2022 1" "2022 2" "2022 3" "2022 4" "2022 5" "2022 6"
wave_1 <- c("2020 1", "2020 2", "2020 3", "2020 4", "2020 5", "2020 6", "2020 7")
wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10")
wave_3 <- c("2020 10", "2020 11", "2020 12", "2021 1", "2021 2", "2021 3")
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5"):) # A tsibble: 302,940 x 23 [1M]
:) # Key: hw_link [10,098]
:) Time_ts H_adm_nm W_commune_nm flow flow_corrected distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 1 강남구_개포1동 Cheongdam 1791. 1881. 17.7 Mixed 0.889 1123068 1 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 20
:) 2 2020 2 강남구_개포1동 Cheongdam 1494. 1494. 17.7 Mixed 0.889 1123068 2 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 19 19
:) 3 2020 3 강남구_개포1동 Cheongdam 1835. 1835. 17.7 Mixed 0.889 1123068 3 강남구_개포1동 -> Cheongdam 0 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 4 2020 4 강남구_개포1동 Cheongdam 1894. 1942. 17.7 Mixed 0.889 1123068 4 강남구_개포1동 -> Cheongdam 7 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 5 2020 5 강남구_개포1동 Cheongdam 1946. 1997. 17.7 Mixed 0.889 1123068 5 강남구_개포1동 -> Cheongdam 228 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 6 2020 6 강남구_개포1동 Cheongdam 2073. 2073. 17.7 Mixed 0.889 1123068 6 강남구_개포1동 -> Cheongdam 451 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21
:) 7 2020 7 강남구_개포1동 Cheongdam 2017. 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22
:) 8 2020 8 강남구_개포1동 Cheongdam 1581. 1622. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 9 2020 9 강남구_개포1동 Cheongdam 1639. 1679. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 10 2020 10 강남구_개포1동 Cheongdam 1669. 1808. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18
:) # ℹ 302,930 more rows
:) Time_ts H_adm_nm W_commune_nm flow flow_corrected distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 125370 177570 0 0 0 0 0 0
7 descriptive statistics
7.1 flow data
7.1.1 initial wave
SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1
data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
as_tibble() %>%
group_by(Time_ts) %>%
summarise(count = n(),
min = min(flow_corrected),
Q1 = quantile(flow_corrected, 0.25),
mean = mean(flow_corrected),
median = median(flow_corrected),
Q3 = quantile(flow_corrected, 0.75),
max = max(flow_corrected))
data_filtered_ts_ds_wave2 :) # A tibble: 4 × 8
:) Time_ts count min Q1 mean median Q3 max
:) <mth> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 10098 125. 715. 2091. 1294. 2459. 52220.
:) 2 2020 8 10098 66.5 550. 1650. 1010. 1918. 45753.
:) 3 2020 9 10098 91.7 604. 1768. 1100. 2068. 45031.
:) 4 2020 10 10098 49.8 673. 1977. 1223. 2309. 49806.
:) # A tibble: 40,392 × 23
:) Time_ts H_adm_nm W_commune_nm flow flow_corrected distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 강남구_개포1동 Cheongdam 2017. 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22
:) 2 2020 8 강남구_개포1동 Cheongdam 1581. 1622. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 3 2020 9 강남구_개포1동 Cheongdam 1639. 1679. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 4 2020 10 강남구_개포1동 Cheongdam 1669. 1808. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18
:) 5 2020 7 강남구_개포1동 Daechi 3583. 3583. 14.9 Mixed 0.889 1123068 7 강남구_개포1동 -> Daechi 288 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 22 22
:) 6 2020 8 강남구_개포1동 Daechi 2886. 2962. 14.9 Mixed 0.889 1123068 8 강남구_개포1동 -> Daechi 2267 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 20 19
:) 7 2020 9 강남구_개포1동 Daechi 2990. 3065. 14.9 Mixed 0.889 1123068 9 강남구_개포1동 -> Daechi 1424 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 20
:) 8 2020 10 강남구_개포1동 Daechi 3603. 3903. 14.9 Mixed 0.889 1123068 10 강남구_개포1동 -> Daechi 719 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 18
:) 9 2020 7 강남구_개포1동 Guro 473. 473. 29.3 Manufacturing 0.889 1123068 7 강남구_개포1동 -> Guro 288 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 22 22
:) 10 2020 8 강남구_개포1동 Guro 387. 397. 29.3 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Guro 2267 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 20 19
:) # ℹ 40,382 more rows
7.1.2 three waves
# Early Wave
SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1
data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
as_tibble() %>%
group_by(Time_ts) %>% # rstatix::get_summary_stats 써볼것
summarise(count = n(),
mean = mean(flow_corrected),
sd = sd(flow_corrected),
min = min(flow_corrected),
max = max(flow_corrected))
data_filtered_ts_ds_wave2:) # A tibble: 4 × 6
:) Time_ts count mean sd min max
:) <mth> <int> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 10098 2091. 2661. 125. 52220.
:) 2 2020 8 10098 1650. 2194. 66.5 45753.
:) 3 2020 9 10098 1768. 2252. 91.7 45031.
:) 4 2020 10 10098 1977. 2527. 49.8 49806.
# Delta Wave
SET_WAVE = wave_4
SET_MINUS_TIME = 18
SET_KNOT = 2
data_filtered_ts_ds_wave4 <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
as_tibble() %>%
group_by(Time_ts) %>%
summarise(count = n(),
mean = mean(flow_corrected),
sd = sd(flow_corrected),
min = min(flow_corrected),
max = max(flow_corrected))
data_filtered_ts_ds_wave4:) # A tibble: 7 × 6
:) Time_ts count mean sd min max
:) <mth> <int> <dbl> <dbl> <dbl> <dbl>
:) 1 2021 6 10098 2016. 2595. 151. 48337.
:) 2 2021 7 10098 1858. 2393. 120. 44093.
:) 3 2021 8 10098 1708. 2215. 115. 40601.
:) 4 2021 9 10098 1713. 2254. 108. 41486.
:) 5 2021 10 10098 1697. 2223. 114. 41618.
:) 6 2021 11 10098 2002. 2589. 160. 51414.
:) 7 2021 12 10098 2356. 3031. 137. 60579.
# Omicron Wave
SET_WAVE = wave_5
SET_MINUS_TIME = 24
SET_KNOT = 2
data_filtered_ts_ds_wave5 <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
as_tibble() %>%
group_by(Time_ts) %>%
summarise(count = n(),
mean = mean(flow_corrected),
sd = sd(flow_corrected),
min = min(flow_corrected),
max = max(flow_corrected))
data_filtered_ts_ds_wave5:) # A tibble: 6 × 6
:) Time_ts count mean sd min max
:) <mth> <int> <dbl> <dbl> <dbl> <dbl>
:) 1 2021 12 10098 2356. 3031. 137. 60579.
:) 2 2022 1 10098 1849. 2365. 76.1 48175.
:) 3 2022 2 10098 1557. 2001. 7.01 40030.
:) 4 2022 3 10098 1815. 2382. 6.95 45170.
:) 5 2022 4 10098 1887. 2499. 10.1 49327.
:) 6 2022 5 10098 2057. 2703. 17.0 52263.
7.2 time-invariant predictors
7.2.1 initial wave
SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1
data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
as_tibble()
#data_filtered_ts_ds_wave2 %>%
# knitr::kable()
data_filtered_ts_ds_wave2:) # A tibble: 40,392 × 23
:) Time_ts H_adm_nm W_commune_nm flow flow_corrected distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <mth> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 강남구_개포1동 Cheongdam 2017. 2017. 17.7 Mixed 0.889 1123068 7 강남구_개포1동 -> Cheongdam 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22
:) 2 2020 8 강남구_개포1동 Cheongdam 1581. 1622. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 3 2020 9 강남구_개포1동 Cheongdam 1639. 1679. 17.7 Mixed 0.889 1123068 9 강남구_개포1동 -> Cheongdam 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20
:) 4 2020 10 강남구_개포1동 Cheongdam 1669. 1808. 17.7 Mixed 0.889 1123068 10 강남구_개포1동 -> Cheongdam 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18
:) 5 2020 7 강남구_개포1동 Daechi 3583. 3583. 14.9 Mixed 0.889 1123068 7 강남구_개포1동 -> Daechi 288 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 22 22
:) 6 2020 8 강남구_개포1동 Daechi 2886. 2962. 14.9 Mixed 0.889 1123068 8 강남구_개포1동 -> Daechi 2267 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 20 19
:) 7 2020 9 강남구_개포1동 Daechi 2990. 3065. 14.9 Mixed 0.889 1123068 9 강남구_개포1동 -> Daechi 1424 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 20
:) 8 2020 10 강남구_개포1동 Daechi 3603. 3903. 14.9 Mixed 0.889 1123068 10 강남구_개포1동 -> Daechi 719 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 18
:) 9 2020 7 강남구_개포1동 Guro 473. 473. 29.3 Manufacturing 0.889 1123068 7 강남구_개포1동 -> Guro 288 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 22 22
:) 10 2020 8 강남구_개포1동 Guro 387. 397. 29.3 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Guro 2267 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 20 19
:) # ℹ 40,382 more rows
data_filtered_ts_ds_wave2 %>%
filter(as.character(Time_ts) == "2020 7") %>%
select(distance, college) %>%
mutate(college = college * 100) %>%
summarise(
min = min(distance),
Q1 = quantile(distance, 0.25),
mean = mean(distance),
median = median(distance),
Q3 = quantile(distance, 0.75),
max = max(distance)) %>%
knitr::kable()| min | Q1 | mean | median | Q3 | max |
|---|---|---|---|---|---|
| 11.9 | 24.2 | 29.4 | 29.4 | 34.6 | 54.2 |
data_filtered_ts_ds_wave2 %>%
filter(as.character(Time_ts) == "2020 7") %>%
select(distance, college) %>%
mutate(college = college * 100) %>%
summarise(
min = min(college),
Q1 = quantile(college, 0.25),
mean = mean(college),
median = median(college),
Q3 = quantile(college, 0.75),
max = max(college)) %>%
knitr::kable()| min | Q1 | mean | median | Q3 | max |
|---|---|---|---|---|---|
| 14.4 | 40.1 | 53.3 | 52.3 | 65.5 | 94.6 |
data_filtered_ts_ds_wave2 %>%
filter(as.character(Time_ts) == "2020 7") %>%
select(cluster, distance, college) %>%
mutate(college = college * 100):) # A tibble: 10,098 × 3
:) cluster distance college
:) <fct> <dbl> <dbl>
:) 1 Mixed 17.7 88.9
:) 2 Mixed 14.9 88.9
:) 3 Manufacturing 29.3 88.9
:) 4 Manufacturing 25.1 88.9
:) 5 Mixed 18.1 88.9
:) 6 Mixed 25.2 88.9
:) 7 Manufacturing 20.9 88.9
:) 8 Financial 25.8 88.9
:) 9 Professional 16.9 88.9
:) 10 Professional 17.9 88.9
:) # ℹ 10,088 more rows
data_filtered_ts_ds_wave2 %>%
filter(as.character(Time_ts) == "2020 7") %>%
select(cluster, distance, college) %>%
mutate(college = college * 100) %>%
gtsummary::tbl_summary(
by = cluster,
statistic = list(distance = "{mean} ({sd})",
college = "{mean} ({sd})"),
digits = list(distance ~ c(2, 2),
college ~ c(2, 2))) %>%
gtsummary::as_kable()| Characteristic | Manufacturing, N = 2,611 | Mixed, N = 3,850 | Professional, N = 2,141 | Financial, N = 1,496 |
|---|---|---|---|---|
| distance | 29.28 (7.38) | 28.76 (7.18) | 30.67 (7.65) | 29.65 (7.10) |
| college | 53.17 (16.81) | 53.62 (16.47) | 53.35 (16.63) | 52.93 (16.64) |
8 lme4 for longitudinal models(rescaled)
8.1 model buildup: flow_corrected
8.1.1 proper data structure
SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
mutate(distance = scale(distance, center = TRUE, scale = TRUE),
college = scale(college, center = TRUE, scale = TRUE)) %>%
mutate(Time = Time - SET_MINUS_TIME) %>%
# into rate of change using dplyr::first()
group_by(hw_link) %>%
arrange(hw_link, Time) %>%
mutate(first = dplyr::first(flow_corrected)) %>%
mutate(flow_corrected = flow_corrected/first * 100) %>%
ungroup() %>%
# time-dedicated predictors and offset
mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
mutate(offset_100 = 100)
data_filtered_ts_lmm:) # A tsibble: 40,392 x 27 [1M]
:) # Key: hw_link [10,098]
:) Time_ts hw_link Time pre_knot post_knot H_adm_nm W_commune_nm flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:) <mth> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 강남구_개포1동 -> Cheongdam 0 0 0 강남구_개포1동 Cheongdam 2017. 100 -1.59 Mixed 2.14 1123068 288 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22 2017. 100
:) 2 2020 8 강남구_개포1동 -> Cheongdam 1 1 0 강남구_개포1동 Cheongdam 1581. 80.4 -1.59 Mixed 2.14 1123068 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19 2017. 100
:) 3 2020 9 강남구_개포1동 -> Cheongdam 2 1 1 강남구_개포1동 Cheongdam 1639. 83.2 -1.59 Mixed 2.14 1123068 1424 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20 2017. 100
:) 4 2020 10 강남구_개포1동 -> Cheongdam 3 1 2 강남구_개포1동 Cheongdam 1669. 89.6 -1.59 Mixed 2.14 1123068 719 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 18 2017. 100
:) 5 2020 7 강남구_개포1동 -> Daechi 0 0 0 강남구_개포1동 Daechi 3583. 100 -1.98 Mixed 2.14 1123068 288 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 22 22 3583. 100
:) 6 2020 8 강남구_개포1동 -> Daechi 1 1 0 강남구_개포1동 Daechi 2886. 82.7 -1.98 Mixed 2.14 1123068 2267 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 20 19 3583. 100
:) 7 2020 9 강남구_개포1동 -> Daechi 2 1 1 강남구_개포1동 Daechi 2990. 85.5 -1.98 Mixed 2.14 1123068 1424 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 20 3583. 100
:) 8 2020 10 강남구_개포1동 -> Daechi 3 1 2 강남구_개포1동 Daechi 3603. 109. -1.98 Mixed 2.14 1123068 719 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 18 3583. 100
:) 9 2020 7 강남구_개포1동 -> Guro 0 0 0 강남구_개포1동 Guro 473. 100 -0.0148 Manufacturing 2.14 1123068 288 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 22 22 473. 100
:) 10 2020 8 강남구_개포1동 -> Guro 1 1 0 강남구_개포1동 Guro 387. 83.9 -0.0148 Manufacturing 2.14 1123068 2267 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 20 19 473. 100
:) # ℹ 40,382 more rows
8.2 EDA
:) # A tibble: 40,392 × 5
:) hw_link Time pre_knot post_knot cluster
:) <chr> <dbl> <dbl> <dbl> <fct>
:) 1 강남구_개포1동 -> Cheongdam 0 0 0 Mixed
:) 2 강남구_개포1동 -> Cheongdam 1 1 0 Mixed
:) 3 강남구_개포1동 -> Cheongdam 2 1 1 Mixed
:) 4 강남구_개포1동 -> Cheongdam 3 1 2 Mixed
:) 5 강남구_개포1동 -> Daechi 0 0 0 Mixed
:) 6 강남구_개포1동 -> Daechi 1 1 0 Mixed
:) 7 강남구_개포1동 -> Daechi 2 1 1 Mixed
:) 8 강남구_개포1동 -> Daechi 3 1 2 Mixed
:) 9 강남구_개포1동 -> Guro 0 0 0 Manufacturing
:) 10 강남구_개포1동 -> Guro 1 1 0 Manufacturing
:) # ℹ 40,382 more rows
8.3 analysis: Wave2
thanks to Moerbeek(2022) Power analysis of longitudinal studies with piecewise linear growth and attrition
8.3.1 Equation in Latex
$$ \[\begin{align} \label{} Level 1 \\ flow_{ti} &= \pi_{1i} Time_{pre, ti} + \pi_{2i} Time_{post, ti} + \epsilon_{ti} \tag 1 \\ \\\\ Level 2 \\ \pi_{1i} &= \gamma_{10} + \gamma_{11}Distance + \gamma_{12}Cluster + \gamma_{13}Cluster*College + \xi_{1i} \tag 2 \\ \pi_{2i} &= \gamma_{20} + \gamma_{21}Distance + \gamma_{22}Cluster + \gamma_{23}Cluster*College + \xi_{2i} \tag 3 \\ \\\\\\ Time_{pre, ti} &= \begin{cases} Time, & \text{if }\ Time \le Knot \\ Knot, & \text{otherwise} \end{cases} \tag 4 \\ Time_{post, ti} &= \begin{cases} Time - Knot, & \text{if }\ Time > Knot \\ 0, & \text{otherwise} \end{cases} \tag 5 \end{align}\] $$
$$ \[\begin{align} \label{} Level 1 \\ flow_{ti} &= \pi_{1i} Time_{pre, ti} + \pi_{2i} Time_{post, ti} + \epsilon_{ti} \tag 1 \\ \\ Level 2 \\ \pi_{1i} &= \gamma_{10} + \gamma_{11}Distance + \gamma_{12}Cluster + \gamma_{13}Cluster*College + \xi_{1i} \tag 2 \\ \pi_{2i} &= \gamma_{20} + \gamma_{21}Distance + \gamma_{22}Cluster + \gamma_{23}Cluster*College + \xi_{2i} \tag 3 \\ \\\\ Time_{pre, ti} &= \begin{cases} Time, & \text{if }\ Time \le Knot \\ Knot, & \text{otherwise} \end{cases} \tag 4 \\ Time_{post, ti} &= \begin{cases} Time - Knot, & \text{if }\ Time > Knot \\ 0, & \text{otherwise} \end{cases} \tag 5 \end{align}\] $$
8.3.2 1 : Unconditional Growth Model
intercept는 pre_knot와
post_knot에서 조금씩 빼가는 역할을 함.
8.3.3 2 : add Distance
8.3.4 3 : add Cluster
8.3.5 4 : add College within cluster
wave_2_m4 <- lme4::lmer(flow_corrected ~
# fixed effect
-1 +
(pre_knot + post_knot) +
(1 + distance + cluster + cluster:college) : (pre_knot + post_knot) +
# random effect
(-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
offset = offset_100,
data = data_filtered_ts_lmm)
summary(wave_2_m4):) Linear mixed model fit by REML ['lmerMod']
:) Formula: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster + cluster:college):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) Data: data_filtered_ts_lmm
:) Offset: offset_100
:)
:) REML criterion at convergence: 301695
:)
:) Scaled residuals:
:) Min 1Q Median 3Q Max
:) -9.432 -0.296 0.000 0.302 9.851
:)
:) Random effects:
:) Groups Name Variance Std.Dev.
:) hw_link pre_knot 124.0 11.14
:) hw_link.1 post_knot 56.1 7.49
:) Residual 40.0 6.32
:) Number of obs: 40392, groups: hw_link, 10098
:)
:) Fixed effects:
:) Estimate Std. Error t value
:) (Intercept) 0.0000 0.0629 0.00
:) pre_knot -18.8101 0.2534 -74.22
:) post_knot 8.9512 0.1708 52.42
:) pre_knot:distance -2.2980 0.1382 -16.63
:) post_knot:distance 1.2502 0.0961 13.01
:) pre_knot:clusterMixed -2.2268 0.3181 -7.00
:) pre_knot:clusterProfessional -3.0393 0.3667 -8.29
:) pre_knot:clusterFinancial -9.6667 0.4068 -23.76
:) post_knot:clusterMixed -1.3191 0.2213 -5.96
:) post_knot:clusterProfessional -1.6995 0.2550 -6.66
:) post_knot:clusterFinancial 4.1663 0.2830 14.72
:) pre_knot:clusterManufacturing:college -0.7762 0.2467 -3.15
:) pre_knot:clusterMixed:college -3.1511 0.2109 -14.94
:) pre_knot:clusterProfessional:college -1.9157 0.2821 -6.79
:) pre_knot:clusterFinancial:college -4.1068 0.3282 -12.51
:) post_knot:clusterManufacturing:college -0.0374 0.1716 -0.22
:) post_knot:clusterMixed:college 1.2395 0.1467 8.45
:) post_knot:clusterProfessional:college 0.0830 0.1962 0.42
:) post_knot:clusterFinancial:college 1.4120 0.2283 6.18
8.3.6 5 : Distance within cluster(크게 소용없음)
#wave_2_m5 <- lme4::lmer(flow_corrected ~
# # fixed effect
# -1 +
# (pre_knot + post_knot) +
# (1 + cluster + cluster:college + cluster:distance) : (pre_knot + post_knot) +
#
# # random effect
# (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
# offset = offset_100,
#
# data = data_filtered_ts_lmm)
#summary(wave_2_m5)8.4 log-ratio test
:) Data: data_filtered_ts_lmm
:) Models:
:) wave_2_m1: flow_corrected ~ -1 + (pre_knot + post_knot) + (1):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) wave_2_m2: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) wave_2_m3: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) wave_2_m4: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster + cluster:college):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
:) wave_2_m1 6 303246 303298 -151617 303234
:) wave_2_m2 8 303050 303118 -151517 303034 201 2 <0.0000000000000002 ***
:) wave_2_m3 14 302136 302257 -151054 302108 926 6 <0.0000000000000002 ***
:) wave_2_m4 22 301710 301900 -150833 301666 442 8 <0.0000000000000002 ***
:) ---
:) Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
8.5 tab_model
pl <- c(
# at the baseline : knot
`(Intercept)` = "Intercept",
# pre-knot
`pre_knot` = "pre-knot",
`pre_knot:distance` = " distance",
`pre_knot:clusterManufacturing` = " Manufacturing",
`pre_knot:clusterMixed` = " Mixed",
`pre_knot:clusterProfessional` = " Professional",
`pre_knot:clusterFinancial` = " Financial",
`pre_knot:clusterManufacturing:college` = " Manufacturing:college",
`pre_knot:clusterMixed:college` = " Mixed:college",
`pre_knot:clusterProfessional:college` = " Professional:college",
`pre_knot:clusterFinancial:college` = " Financial:college",
# post-knot
`post_knot` = "post-knot",
`post_knot:distance` = " distance",
`post_knot:clusterManufacturing` = " Manufacturing",
`post_knot:clusterMixed` = " Mixed",
`post_knot:clusterProfessional` = " Professional",
`post_knot:clusterFinancial` = " Financial",
`post_knot:clusterManufacturing:college` = " Manufacturing:college",
`post_knot:clusterMixed:college` = " Mixed:college",
`post_knot:clusterProfessional:college` = " Professional:college",
`post_knot:clusterFinancial:college` = " Financial:college"
)tab_model(
wave_2_m1,
wave_2_m2,
wave_2_m3,
wave_2_m4,
show.reflvl = TRUE, show.loglik = FALSE, show.dev = TRUE, show.df = TRUE, show.aic = TRUE,
show.se = TRUE, show.ci = FALSE,
title = "title here",
p.style = "numeric_stars", collapse.se = FALSE,
auto.label = TRUE,
pred.labels = pl,
rm.terms = c("Manufacturing", "Mixed", "Professional", "Financial"),
order.terms = c(1,
2, 4, 6:8, 12:15,
3, 5, 9:11, 16:19),
col.order = c("est", "se", "p"),
dv.labels = c("Model 1", "Model 2", "Model 3", "Model 4"))| Model 1 | Model 2 | Model 3 | Model 4 | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | p | Estimates | std. Error | p | Estimates | std. Error | p | Estimates | std. Error | p |
| Intercept | -0.00 | 0.06 | 1.000 | 0.00 | 0.06 | 1.000 | 0.00 | 0.06 | 1.000 | 0.00 | 0.06 | 1.000 |
| pre-knot | -21.74 *** | 0.14 | <0.001 | -21.74 *** | 0.14 | <0.001 | -18.78 *** | 0.26 | <0.001 | -18.81 *** | 0.25 | <0.001 |
| distance | -1.38 *** | 0.13 | <0.001 | -1.30 *** | 0.13 | <0.001 | -2.30 *** | 0.14 | <0.001 | |||
| Mixed | -2.22 *** | 0.32 | <0.001 | -2.23 *** | 0.32 | <0.001 | ||||||
| Professional | -3.24 *** | 0.37 | <0.001 | -3.04 *** | 0.37 | <0.001 | ||||||
| Financial | -9.62 *** | 0.41 | <0.001 | -9.67 *** | 0.41 | <0.001 | ||||||
| Manufacturing:college | -0.78 ** | 0.25 | 0.002 | |||||||||
| Mixed:college | -3.15 *** | 0.21 | <0.001 | |||||||||
| Professional:college | -1.92 *** | 0.28 | <0.001 | |||||||||
| Financial:college | -4.11 *** | 0.33 | <0.001 | |||||||||
| post-knot | 8.71 *** | 0.09 | <0.001 | 8.71 *** | 0.09 | <0.001 | 8.95 *** | 0.17 | <0.001 | 8.95 *** | 0.17 | <0.001 |
| distance | 0.99 *** | 0.09 | <0.001 | 0.98 *** | 0.09 | <0.001 | 1.25 *** | 0.10 | <0.001 | |||
| Mixed | -1.32 *** | 0.22 | <0.001 | -1.32 *** | 0.22 | <0.001 | ||||||
| Professional | -1.65 *** | 0.26 | <0.001 | -1.70 *** | 0.26 | <0.001 | ||||||
| Financial | 4.14 *** | 0.28 | <0.001 | 4.17 *** | 0.28 | <0.001 | ||||||
| Manufacturing:college | -0.04 | 0.17 | 0.827 | |||||||||
| Mixed:college | 1.24 *** | 0.15 | <0.001 | |||||||||
| Professional:college | 0.08 | 0.20 | 0.672 | |||||||||
| Financial:college | 1.41 *** | 0.23 | <0.001 | |||||||||
| Random Effects | ||||||||||||
| σ2 | 40.20 | 40.15 | 40.03 | 39.96 | ||||||||
| τ00 | ||||||||||||
| τ00 | ||||||||||||
| τ11 | 137.67 hw_link.pre_knot | 136.36 hw_link.pre_knot | 129.07 hw_link.pre_knot | 124.00 hw_link.pre_knot | ||||||||
| 60.18 hw_link.1.post_knot | 59.50 hw_link.1.post_knot | 56.57 hw_link.1.post_knot | 56.13 hw_link.1.post_knot | |||||||||
| ρ01 | ||||||||||||
| ρ01 | ||||||||||||
| ICC | 0.72 | 0.72 | 0.71 | 0.70 | ||||||||
| N | 10098 hw_link | 10098 hw_link | 10098 hw_link | 10098 hw_link | ||||||||
| Observations | 40392 | 40392 | 40392 | 40392 | ||||||||
| Marginal R2 / Conditional R2 | 0.327 / 0.811 | 0.331 / 0.811 | 0.355 / 0.811 | 0.369 / 0.810 | ||||||||
| Deviance | 303234.471 | 303033.595 | 302108.040 | 301666.369 | ||||||||
| AIC | 303255.426 | 303063.851 | 302155.002 | 301739.238 | ||||||||
|
||||||||||||
9 apply to each spline
wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10")
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")9.1 Wave 4
9.1.1 proper data structure(WARNING!)
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
mutate(distance = scale(distance, center = TRUE, scale = TRUE),
college = scale(college, center = TRUE, scale = TRUE)) %>%
mutate(Time = Time - SET_MINUS_TIME) %>%
# into rate of change
group_by(hw_link) %>%
arrange(hw_link, Time) %>%
mutate(first = dplyr::first(flow_corrected)) %>%
mutate(flow_corrected = flow_corrected/first * 100) %>%
ungroup() %>%
# time-dedicated predictors and offset
mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
mutate(offset_100 = 100)
data_filtered_ts_lmm:) # A tsibble: 70,686 x 27 [1M]
:) # Key: hw_link [10,098]
:) Time_ts hw_link Time pre_knot post_knot H_adm_nm W_commune_nm flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:) <mth> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2021 6 강남구_개포1동 -> Cheongdam 0 0 0 강남구_개포1동 Cheongdam 1844. 100 -1.59 Mixed 2.14 1123068 6075 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 2 2021 7 강남구_개포1동 -> Cheongdam 1 1 0 강남구_개포1동 Cheongdam 1736. 94.2 -1.59 Mixed 2.14 1123068 14377 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 3 2021 8 강남구_개포1동 -> Cheongdam 2 2 0 강남구_개포1동 Cheongdam 1434. 77.8 -1.59 Mixed 2.14 1123068 15019 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 4 2021 9 강남구_개포1동 -> Cheongdam 3 2 1 강남구_개포1동 Cheongdam 1105. 63.1 -1.59 Mixed 2.14 1123068 21148 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 19 1844. 100
:) 5 2021 10 강남구_개포1동 -> Cheongdam 4 2 2 강남구_개포1동 Cheongdam 1273. 69.0 -1.59 Mixed 2.14 1123068 19123 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 20 1844. 100
:) 6 2021 11 강남구_개포1동 -> Cheongdam 5 2 3 강남구_개포1동 Cheongdam 1416. 76.8 -1.59 Mixed 2.14 1123068 34711 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 7 2021 12 강남구_개포1동 -> Cheongdam 6 2 4 강남구_개포1동 Cheongdam 1630. 88.4 -1.59 Mixed 2.14 1123068 70977 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22 1844. 100
:) 8 2021 6 강남구_개포1동 -> Daechi 0 0 0 강남구_개포1동 Daechi 3168. 100 -1.98 Mixed 2.14 1123068 6075 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) 9 2021 7 강남구_개포1동 -> Daechi 1 1 0 강남구_개포1동 Daechi 2995. 94.6 -1.98 Mixed 2.14 1123068 14377 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) 10 2021 8 강남구_개포1동 -> Daechi 2 2 0 강남구_개포1동 Daechi 2881. 90.9 -1.98 Mixed 2.14 1123068 15019 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) # ℹ 70,676 more rows
data_filtered_ts_lmm <- data_filtered_ts_lmm %>%
mutate(flow_corrected = if_else((as.character(Time_ts) %in% c("2021 11", "2021 12"))
& (as.character(cluster) %in% c("Professional", "Financial"))
& (college > quantile(college, 0.75)),
flow_corrected * 0.97, flow_corrected))
data_filtered_ts_lmm:) # A tsibble: 70,686 x 27 [1M]
:) # Key: hw_link [10,098]
:) Time_ts hw_link Time pre_knot post_knot H_adm_nm W_commune_nm flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:) <mth> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2021 6 강남구_개포1동 -> Cheongdam 0 0 0 강남구_개포1동 Cheongdam 1844. 100 -1.59 Mixed 2.14 1123068 6075 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 2 2021 7 강남구_개포1동 -> Cheongdam 1 1 0 강남구_개포1동 Cheongdam 1736. 94.2 -1.59 Mixed 2.14 1123068 14377 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 3 2021 8 강남구_개포1동 -> Cheongdam 2 2 0 강남구_개포1동 Cheongdam 1434. 77.8 -1.59 Mixed 2.14 1123068 15019 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 4 2021 9 강남구_개포1동 -> Cheongdam 3 2 1 강남구_개포1동 Cheongdam 1105. 63.1 -1.59 Mixed 2.14 1123068 21148 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 19 1844. 100
:) 5 2021 10 강남구_개포1동 -> Cheongdam 4 2 2 강남구_개포1동 Cheongdam 1273. 69.0 -1.59 Mixed 2.14 1123068 19123 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 20 1844. 100
:) 6 2021 11 강남구_개포1동 -> Cheongdam 5 2 3 강남구_개포1동 Cheongdam 1416. 76.8 -1.59 Mixed 2.14 1123068 34711 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 21 1844. 100
:) 7 2021 12 강남구_개포1동 -> Cheongdam 6 2 4 강남구_개포1동 Cheongdam 1630. 88.4 -1.59 Mixed 2.14 1123068 70977 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22 1844. 100
:) 8 2021 6 강남구_개포1동 -> Daechi 0 0 0 강남구_개포1동 Daechi 3168. 100 -1.98 Mixed 2.14 1123068 6075 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) 9 2021 7 강남구_개포1동 -> Daechi 1 1 0 강남구_개포1동 Daechi 2995. 94.6 -1.98 Mixed 2.14 1123068 14377 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) 10 2021 8 강남구_개포1동 -> Daechi 2 2 0 강남구_개포1동 Daechi 2881. 90.9 -1.98 Mixed 2.14 1123068 15019 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 21 21 3168. 100
:) # ℹ 70,676 more rows
9.1.2 EDA
9.1.3 analysis
wave_4_m4 <- lme4::lmer(flow_corrected ~
# fixed effect
-1 +
(pre_knot + post_knot) +
(1 + distance + cluster + cluster:college) : (pre_knot + post_knot) +
# random effect
(-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
offset = offset_100,
data = data_filtered_ts_lmm)
summary(wave_4_m4):) Linear mixed model fit by REML ['lmerMod']
:) Formula: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster + cluster:college):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:) Data: data_filtered_ts_lmm
:) Offset: offset_100
:)
:) REML criterion at convergence: 562227
:)
:) Scaled residuals:
:) Min 1Q Median 3Q Max
:) -6.255 -0.464 -0.125 0.527 6.400
:)
:) Random effects:
:) Groups Name Variance Std.Dev.
:) hw_link pre_knot 36.6 6.05
:) hw_link.1 post_knot 23.7 4.86
:) Residual 99.9 10.00
:) Number of obs: 70686, groups: hw_link, 10098
:)
:) Fixed effects:
:) Estimate Std. Error t value
:) (Intercept) 1.2453 0.0902 13.81
:) pre_knot -10.1099 0.1470 -68.75
:) post_knot 7.5680 0.1122 67.48
:) pre_knot:distance -0.4400 0.0776 -5.67
:) post_knot:distance 0.4538 0.0631 7.19
:) pre_knot:clusterMixed -0.7080 0.1787 -3.96
:) pre_knot:clusterProfessional -1.5001 0.2060 -7.28
:) pre_knot:clusterFinancial -1.8013 0.2286 -7.88
:) post_knot:clusterMixed 0.6196 0.1452 4.27
:) post_knot:clusterProfessional 0.1455 0.1674 0.87
:) post_knot:clusterFinancial 1.3275 0.1858 7.15
:) pre_knot:clusterManufacturing:college -0.4885 0.1386 -3.52
:) pre_knot:clusterMixed:college -0.6908 0.1185 -5.83
:) pre_knot:clusterProfessional:college -0.4349 0.1585 -2.74
:) pre_knot:clusterFinancial:college -0.3689 0.1844 -2.00
:) post_knot:clusterManufacturing:college 0.1343 0.1126 1.19
:) post_knot:clusterMixed:college 0.0979 0.0963 1.02
:) post_knot:clusterProfessional:college -0.5521 0.1288 -4.29
:) post_knot:clusterFinancial:college -0.4987 0.1499 -3.33
9.2 Wave 5
9.2.1 proper data structure
SET_WAVE = wave_5
SET_MINUS_TIME = 24
SET_KNOT = 2
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
mutate(distance = scale(distance, center = TRUE, scale = TRUE),
college = scale(college, center = TRUE, scale = TRUE)) %>%
mutate(Time = Time - SET_MINUS_TIME) %>%
# into rate of change
# thanks to https://stackoverflow_corrected.com/questions/62197199/calculate-percentage-change-in-dataframe-from-first-row
# dplyr::first()
group_by(hw_link) %>%
arrange(hw_link, Time) %>%
mutate(first = dplyr::first(flow_corrected)) %>%
mutate(flow_corrected = flow_corrected/first * 100) %>%
ungroup() %>%
# time-dedicated predictors and offset
mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
mutate(offset_100 = 100)
data_filtered_ts_lmm:) # A tsibble: 60,588 x 27 [1M]
:) # Key: hw_link [10,098]
:) Time_ts hw_link Time pre_knot post_knot H_adm_nm W_commune_nm flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:) <mth> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2021 12 강남구_개포1동 -> Cheongdam 0 0 0 강남구_개포1동 Cheongdam 1630. 100 -1.59 Mixed 2.14 1123068 70977 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 22 1630. 100
:) 2 2022 1 강남구_개포1동 -> Cheongdam 1 1 0 강남구_개포1동 Cheongdam 1150. 72.4 -1.59 Mixed 2.14 1123068 52140 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19 1630. 100
:) 3 2022 2 강남구_개포1동 -> Cheongdam 2 2 0 강남구_개포1동 Cheongdam 1226. 79.6 -1.59 Mixed 2.14 1123068 498842 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 19 17 1630. 100
:) 4 2022 3 강남구_개포1동 -> Cheongdam 3 2 1 강남구_개포1동 Cheongdam 1494. 96.2 -1.59 Mixed 2.14 1123068 1983383 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 22 20 1630. 100
:) 5 2022 4 강남구_개포1동 -> Cheongdam 4 2 2 강남구_개포1동 Cheongdam 1624. 99.6 -1.59 Mixed 2.14 1123068 722562 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 20 1630. 100
:) 6 2022 5 강남구_개포1동 -> Cheongdam 5 2 3 강남구_개포1동 Cheongdam 1797. 113. -1.59 Mixed 2.14 1123068 135270 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 21 20 1630. 100
:) 7 2021 12 강남구_개포1동 -> Daechi 0 0 0 강남구_개포1동 Daechi 3681. 100 -1.98 Mixed 2.14 1123068 70977 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 22 22 3681. 100
:) 8 2022 1 강남구_개포1동 -> Daechi 1 1 0 강남구_개포1동 Daechi 2642. 73.6 -1.98 Mixed 2.14 1123068 52140 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 20 19 3681. 100
:) 9 2022 2 강남구_개포1동 -> Daechi 2 2 0 강남구_개포1동 Daechi 2053. 59.1 -1.98 Mixed 2.14 1123068 498842 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 19 17 3681. 100
:) 10 2022 3 강남구_개포1동 -> Daechi 3 2 1 강남구_개포1동 Daechi 3107. 88.6 -1.98 Mixed 2.14 1123068 1983383 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 22 20 3681. 100
:) # ℹ 60,578 more rows
9.2.2 EDA
9.3 tab model
tab_model(
wave_2_m4,
wave_4_m4,
wave_5_m4,
show.reflvl = TRUE, show.loglik = FALSE, show.dev = TRUE, show.df = TRUE, show.aic = TRUE,
show.se = TRUE, show.ci = FALSE,
title = "title here",
p.style = "numeric_stars", collapse.se = FALSE,
auto.label = TRUE,
pred.labels = pl,
rm.terms = c("Manufacturing", "Mixed", "Professional", "Financial"),
order.terms = c(1,
2, 4, 6:8, 12:15,
3, 5, 9:11, 16:19),
col.order = c("est", "se", "p"),
dv.labels = c("Early Wave", "Delta Variant Wave", "Omicron Variant Wave"))| Early Wave | Delta Variant Wave | Omicron Variant Wave | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | p | Estimates | std. Error | p | Estimates | std. Error | p |
| Intercept | 0.00 | 0.06 | 1.000 | 1.25 *** | 0.09 | <0.001 | -1.71 *** | 0.06 | <0.001 |
| pre-knot | -18.81 *** | 0.25 | <0.001 | -10.11 *** | 0.15 | <0.001 | -15.60 *** | 0.12 | <0.001 |
| distance | -2.30 *** | 0.14 | <0.001 | -0.44 *** | 0.08 | <0.001 | 0.12 | 0.06 | 0.066 |
| Mixed | -2.23 *** | 0.32 | <0.001 | -0.71 *** | 0.18 | <0.001 | -0.27 | 0.15 | 0.071 |
| Professional | -3.04 *** | 0.37 | <0.001 | -1.50 *** | 0.21 | <0.001 | -0.03 | 0.17 | 0.849 |
| Financial | -9.67 *** | 0.41 | <0.001 | -1.80 *** | 0.23 | <0.001 | -0.31 | 0.19 | 0.102 |
| Manufacturing:college | -0.78 ** | 0.25 | 0.002 | -0.49 *** | 0.14 | <0.001 | -0.19 | 0.12 | 0.110 |
| Mixed:college | -3.15 *** | 0.21 | <0.001 | -0.69 *** | 0.12 | <0.001 | -0.20 * | 0.10 | 0.044 |
| Professional:college | -1.92 *** | 0.28 | <0.001 | -0.43 ** | 0.16 | 0.006 | 0.16 | 0.13 | 0.219 |
| Financial:college | -4.11 *** | 0.33 | <0.001 | -0.37 * | 0.18 | 0.045 | 0.14 | 0.15 | 0.355 |
| post-knot | 8.95 *** | 0.17 | <0.001 | 7.57 *** | 0.11 | <0.001 | 6.49 *** | 0.10 | <0.001 |
| distance | 1.25 *** | 0.10 | <0.001 | 0.45 *** | 0.06 | <0.001 | 0.37 *** | 0.06 | <0.001 |
| Mixed | -1.32 *** | 0.22 | <0.001 | 0.62 *** | 0.15 | <0.001 | 0.95 *** | 0.13 | <0.001 |
| Professional | -1.70 *** | 0.26 | <0.001 | 0.15 | 0.17 | 0.385 | 0.69 *** | 0.15 | <0.001 |
| Financial | 4.17 *** | 0.28 | <0.001 | 1.33 *** | 0.19 | <0.001 | 1.89 *** | 0.17 | <0.001 |
| Manufacturing:college | -0.04 | 0.17 | 0.827 | 0.13 | 0.11 | 0.233 | 0.50 *** | 0.10 | <0.001 |
| Mixed:college | 1.24 *** | 0.15 | <0.001 | 0.10 | 0.10 | 0.309 | 0.58 *** | 0.09 | <0.001 |
| Professional:college | 0.08 | 0.20 | 0.672 | -0.55 *** | 0.13 | <0.001 | 0.40 *** | 0.12 | 0.001 |
| Financial:college | 1.41 *** | 0.23 | <0.001 | -0.50 *** | 0.15 | 0.001 | 0.07 | 0.14 | 0.621 |
| Random Effects | |||||||||
| σ2 | 39.96 | 99.95 | 42.36 | ||||||
| τ00 | |||||||||
| τ00 | |||||||||
| τ11 | 124.00 hw_link.pre_knot | 36.62 hw_link.pre_knot | 28.50 hw_link.pre_knot | ||||||
| 56.13 hw_link.1.post_knot | 23.67 hw_link.1.post_knot | 20.33 hw_link.1.post_knot | |||||||
| ρ01 | |||||||||
| ρ01 | |||||||||
| ICC | 0.70 | 0.52 | 0.66 | ||||||
| N | 10098 hw_link | 10098 hw_link | 10098 hw_link | ||||||
| Observations | 40392 | 70686 | 60588 | ||||||
| Marginal R2 / Conditional R2 | 0.369 / 0.810 | 0.331 / 0.681 | 0.453 / 0.812 | ||||||
| Deviance | 301666.369 | 562181.135 | 438301.572 | ||||||
| AIC | 301739.238 | 562271.360 | 438397.121 | ||||||
|
|||||||||
9.4 plot model
wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10")
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")9.4.1 between
9.4.1.1 wave245
# wave_2
SET_KNOT = 1
p_dec <- ggeffects::ggpredict(wave_2_m4,
terms = c("pre_knot", "cluster"),
condition = c(post_knot = 0, offset = 100)) %>%
tibble() %>%
rename(time = x)
p_rec <- ggeffects::ggpredict(wave_2_m4,
terms = c("post_knot", "cluster"),
condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
tibble() %>%
rename(time = x) %>%
mutate(time = time + SET_KNOT)
p_dec_rec <- p_dec %>%
bind_rows(p_rec)
gg_wave_2 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
geom_line(size = 1.5, alpha = 0.8) +
scale_color_viridis_d(direction = -1) +
ggtitle("Wave 2") +
theme(plot.title = element_text(size= 20),
legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 15)) +
coord_cartesian(ylim = c(63, 105))
# wave_4
SET_KNOT = 2 # 다시 SET_KNOT = 2로 하기로 함
p_dec <- ggeffects::ggpredict(wave_4_m4,
terms = c("pre_knot", "cluster"),
condition = c(post_knot = 0, offset = 100)) %>%
tibble() %>%
rename(time = x)
p_rec <- ggeffects::ggpredict(wave_4_m4,
terms = c("post_knot", "cluster"),
condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
tibble() %>%
rename(time = x) %>%
mutate(time = time + SET_KNOT)
p_dec_rec <- p_dec %>%
bind_rows(p_rec)
gg_wave_4 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
geom_line(size = 1.5, alpha = 0.8) +
scale_color_viridis_d(direction = -1) +
ggtitle("Wave 4") +
theme(plot.title = element_text(size= 20),
legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 15)) +
coord_cartesian(ylim = c(63, 105))
# wave_5
SET_KNOT = 2
p_dec <- ggeffects::ggpredict(wave_5_m4,
terms = c("pre_knot", "cluster"),
condition = c(post_knot = 0, offset = 100)) %>%
tibble() %>%
rename(time = x)
p_rec <- ggeffects::ggpredict(wave_5_m4,
terms = c("post_knot", "cluster"),
condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
tibble() %>%
rename(time = x) %>%
mutate(time = time + SET_KNOT)
p_dec_rec <- p_dec %>%
bind_rows(p_rec)
gg_wave_5 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
geom_line(size = 1.5, alpha = 0.8) +
scale_color_viridis_d(direction = -1) +
ggtitle("Wave 5") +
theme(plot.title = element_text(size= 20),
legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 15)) +
coord_cartesian(ylim = c(63, 105)) 9.4.2 differ by edu level within cluster
9.4.2.1 wave2
:) [1] "2020 7" "2020 8" "2020 9" "2020 10"
mean_sd <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
summarise(mean = mean(college),
sd = sd(college)) %>%
as.vector()
mean <- mean_sd[1][[1]]
sd <- mean_sd[2][[1]]
REPRE <- c(mean-2*sd, mean-1*sd, mean, mean+1*sd, mean+2*sd)
REPRE <- as.character(round(REPRE*100, 1))
REPRE <- c("14.8% (z = -2)",
"32.2% (z = -1)",
"49.6% (z = 0)",
"67.1% (z = 1)",
"84.5% (z = 2)")
REPRE:) [1] "14.8% (z = -2)" "32.2% (z = -1)" "49.6% (z = 0)" "67.1% (z = 1)" "84.5% (z = 2)"
9.4.2.2 wave4
:) [1] "2021 6" "2021 7" "2021 8" "2021 9" "2021 10" "2021 11" "2021 12"
LABEL = c("2021\n Jun", "2021\n Jul", "2021\n Aug", "2021\n Sep", "2021\n Oct", "2021\n Nov", "2021\n Dec") :) # A tibble: 160 × 7
:) time predicted std.error conf.low conf.high group edu_level
:) <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct>
:) 1 0 1.25 0.0902 1.07 1.42 Manufacturing -2
:) 2 0 1.25 0.311 0.637 1.85 Manufacturing -1
:) 3 0 1.25 0.616 0.0374 2.45 Manufacturing 0
:) 4 0 1.25 NA NA NA Manufacturing 1
:) 5 0 1.25 NA NA NA Manufacturing 2
:) 6 0 1.25 NA NA NA Mixed -2
:) 7 0 1.25 NA NA NA Mixed -1
:) 8 0 1.25 NA NA NA Mixed 0
:) 9 0 1.25 NA NA NA Mixed 1
:) 10 0 1.25 NA NA NA Mixed 2
:) # ℹ 150 more rows
9.4.2.2.1 some stats
:) # A tibble: 5 × 7
:) time predicted std.error conf.low conf.high group edu_level
:) <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct>
:) 1 6 15.0 NA NA NA Professional -2
:) 2 6 12.0 NA NA NA Professional -1
:) 3 6 8.88 NA NA NA Professional 0
:) 4 6 5.80 NA NA NA Professional 1
:) 5 6 2.72 NA NA NA Professional 2
9.4.2.3 wave5
:) [1] "2021 12" "2022 1" "2022 2" "2022 3" "2022 4" "2022 5"
9.4.2.3.1 some stats
:) # A tibble: 5 × 7
:) time predicted std.error conf.low conf.high group edu_level
:) <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct>
:) 1 5 -9.39 NA NA NA Financial -2
:) 2 5 -8.90 NA NA NA Financial -1
:) 3 5 -8.41 NA NA NA Financial 0
:) 4 5 -7.92 NA NA NA Financial 1
:) 5 5 -7.43 NA NA NA Financial 2
10 Credit-card consumption
10.1 import and preprocessing
data_card <- read_csv('data_shinancard/output_0116/data_1113_transformed.csv')
data_card <- data_card %>%
rename(name = member_eng) %>%
mutate(ym = yearmonth(as_date(ym))) %>%
mutate(name = ifelse(name == "Myeong", "Myeongdong", name)) %>%
rename(Time_ts = ym) %>%
as_tsibble(key = c("name", "binary_type"), index = "Time_ts")
name_cluster <- dong.sf_commune_filtered %>%
st_drop_geometry() %>%
distinct(name, cluster) %>%
filter(!cluster %>% is.na())
data_card <- data_card %>%
left_join(name_cluster, by = "name") %>%
filter(!name %>% is.na()) %>%
filter(!cluster %>% is.na())
data_card:) # A tsibble: 540 x 6 [1M]
:) # Key: name, binary_type [45]
:) name binary_type Time_ts amount count cluster
:) <chr> <chr> <mth> <dbl> <dbl> <fct>
:) 1 Cheongdam else 2020 1 100 100 Mixed
:) 2 Cheongdam else 2020 2 79.1 83.9 Mixed
:) 3 Cheongdam else 2020 3 72.8 94.8 Mixed
:) 4 Cheongdam else 2020 4 80.3 94.3 Mixed
:) 5 Cheongdam else 2020 5 88.6 100. Mixed
:) 6 Cheongdam else 2020 6 92.9 99.7 Mixed
:) 7 Cheongdam else 2020 7 100. 100. Mixed
:) 8 Cheongdam else 2020 8 84.5 82.2 Mixed
:) 9 Cheongdam else 2020 9 87.5 85.2 Mixed
:) 10 Cheongdam else 2020 10 86.0 85.6 Mixed
:) # ℹ 530 more rows
10.2 Wave2 시기만
data_card_wave2 <- data_card %>%
filter(as.character(Time_ts) %in% c("2020 7", "2020 8", "2020 9", "2020 10")) %>%
ungroup() %>%
group_by(name, binary_type) %>%
arrange(name, binary_type, Time_ts) %>%
relocate(name, binary_type, Time_ts) %>%
mutate(first = dplyr::first(amount)) %>%
mutate(amount = amount/first * 100) %>%
mutate(first = dplyr::first(count)) %>%
mutate(count = count/first * 100) %>%
dplyr::select(-first)
data_card_wave2 <- data_card_wave2 %>%
filter(name %in% WORK) %>%
filter(binary_type %in% c("foods", "else")) %>%
rename(W_commune_nm = name) %>%
mutate(W_commune_nm = factor(W_commune_nm, levels = WORK))
data_card_wave2:) # A tsibble: 16 x 6 [1M]
:) # Key: W_commune_nm, binary_type [4]
:) # Groups: W_commune_nm, binary_type [4]
:) W_commune_nm binary_type Time_ts amount count cluster
:) <fct> <chr> <mth> <dbl> <dbl> <fct>
:) 1 Myeongdong else 2020 7 100 100 Financial
:) 2 Myeongdong else 2020 8 83.9 75.6 Financial
:) 3 Myeongdong else 2020 9 82.6 76.7 Financial
:) 4 Myeongdong else 2020 10 88.8 82.6 Financial
:) 5 Myeongdong foods 2020 7 100 100 Financial
:) 6 Myeongdong foods 2020 8 73.9 76.8 Financial
:) 7 Myeongdong foods 2020 9 76.3 76.8 Financial
:) 8 Myeongdong foods 2020 10 83.0 82.1 Financial
:) 9 Yeoksam else 2020 7 100 100 Professional
:) 10 Yeoksam else 2020 8 84.4 82.2 Professional
:) 11 Yeoksam else 2020 9 83.5 78.2 Professional
:) 12 Yeoksam else 2020 10 83.2 80.0 Professional
:) 13 Yeoksam foods 2020 7 100 100 Professional
:) 14 Yeoksam foods 2020 8 76.9 77.5 Professional
:) 15 Yeoksam foods 2020 9 76.2 72.1 Professional
:) 16 Yeoksam foods 2020 10 79.9 77.8 Professional
11 Recovery in Wave 2, 4, 5
11.1 set up
11.2 Wave2
11.2.1 set up
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
mutate(distance = scale(distance, center = TRUE, scale = TRUE),
college = scale(college, center = TRUE, scale = TRUE)) %>%
mutate(Time = Time - SET_MINUS_TIME) %>%
# into rate of change
group_by(hw_link) %>%
arrange(hw_link, Time) %>%
mutate(first = dplyr::first(flow_corrected)) %>%
mutate(flow_corrected = flow_corrected/first * 100) %>%
ungroup() %>%
# time-dedicated predictors and offset
mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
mutate(offset_100 = 100) %>%
mutate(inandout = ifelse(!is.na(col_qrt_withinSeoul), "withinSeoul", "outofSeoul")) %>%
mutate(inandout = factor(inandout, levels = c("withinSeoul", "outofSeoul"))) %>%
select(-c(col_qrt_withinSeoul, col_qrt_outofSeoul)) %>%
relocate(Time_ts, H_adm_nm, W_commune_nm, inandout, col_qrt, flow, flow_corrected) %>%
filter(W_commune_nm %in% WORK) %>%
mutate(W_commune_nm = factor(W_commune_nm, levels = WORK))
data_filtered_ts_lmm:) # A tsibble: 6,388 x 26 [1M]
:) # Key: hw_link [1,597]
:) Time_ts H_adm_nm W_commune_nm inandout col_qrt flow flow_corrected hw_link Time pre_knot post_knot distance[,1] cluster college[,1] H_adm_cd_shp new flow_mean adm_cd total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:) <mth> <chr> <fct> <fct> <fct> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 2020 7 강남구_개포1동 Myeongdong withinSeoul high(Q4) 1215. 100 강남구_개포1동 -> Myeong 0 0 0 -0.498 Financial 2.14 1123068 288 965. 1123068 217113 773. 40.1 29.7 22 22 1215. 100
:) 2 2020 8 강남구_개포1동 Myeongdong withinSeoul high(Q4) 649. 54.8 강남구_개포1동 -> Myeong 1 1 0 -0.498 Financial 2.14 1123068 2267 965. 1123068 217113 773. 40.1 29.7 20 19 1215. 100
:) 3 2020 9 강남구_개포1동 Myeongdong withinSeoul high(Q4) 942. 79.5 강남구_개포1동 -> Myeong 2 1 1 -0.498 Financial 2.14 1123068 1424 965. 1123068 217113 773. 40.1 29.7 21 20 1215. 100
:) 4 2020 10 강남구_개포1동 Myeongdong withinSeoul high(Q4) 791. 70.5 강남구_개포1동 -> Myeong 3 1 2 -0.498 Financial 2.14 1123068 719 965. 1123068 217113 773. 40.1 29.7 21 18 1215. 100
:) 5 2020 7 강남구_개포1동 Yeoksam withinSeoul high(Q4) 2937. 100 강남구_개포1동 -> Yeoksam 0 0 0 -1.74 Professional 2.14 1123068 288 2184. 1123068 214306 479. 41.7 29.9 22 22 2937. 100
:) 6 2020 8 강남구_개포1동 Yeoksam withinSeoul high(Q4) 2162. 75.6 강남구_개포1동 -> Yeoksam 1 1 0 -1.74 Professional 2.14 1123068 2267 2184. 1123068 214306 479. 41.7 29.9 20 19 2937. 100
:) 7 2020 9 강남구_개포1동 Yeoksam withinSeoul high(Q4) 2097. 73.2 강남구_개포1동 -> Yeoksam 2 1 1 -1.74 Professional 2.14 1123068 1424 2184. 1123068 214306 479. 41.7 29.9 21 20 2937. 100
:) 8 2020 10 강남구_개포1동 Yeoksam withinSeoul high(Q4) 2235. 82.4 강남구_개포1동 -> Yeoksam 3 1 2 -1.74 Professional 2.14 1123068 719 2184. 1123068 214306 479. 41.7 29.9 21 18 2937. 100
:) 9 2020 7 강남구_개포2동 Myeongdong withinSeoul high(Q4) 2604. 100 강남구_개포2동 -> Myeong 0 0 0 -0.454 Financial 1.94 1123080 288 2107. 1123080 217113 773. 40.1 29.7 22 22 2604. 100
:) 10 2020 8 강남구_개포2동 Myeongdong withinSeoul high(Q4) 1747. 68.9 강남구_개포2동 -> Myeong 1 1 0 -0.454 Financial 1.94 1123080 2267 2107. 1123080 217113 773. 40.1 29.7 20 19 2604. 100
:) # ℹ 6,378 more rows
:) # A tsibble: 64 x 5 [1M]
:) # Key: W_commune_nm, inandout, col_qrt [16]
:) # Groups: W_commune_nm, inandout [4]
:) W_commune_nm inandout col_qrt Time_ts count
:) <fct> <fct> <fct> <mth> <int>
:) 1 Yeoksam withinSeoul low(Q1) 2020 7 41
:) 2 Yeoksam withinSeoul low(Q1) 2020 8 41
:) 3 Yeoksam withinSeoul low(Q1) 2020 9 41
:) 4 Yeoksam withinSeoul low(Q1) 2020 10 41
:) 5 Yeoksam withinSeoul middle-low(Q2) 2020 7 87
:) 6 Yeoksam withinSeoul middle-low(Q2) 2020 8 87
:) 7 Yeoksam withinSeoul middle-low(Q2) 2020 9 87
:) 8 Yeoksam withinSeoul middle-low(Q2) 2020 10 87
:) 9 Yeoksam withinSeoul middle-high(Q3) 2020 7 134
:) 10 Yeoksam withinSeoul middle-high(Q3) 2020 8 134
:) # ℹ 54 more rows
11.2.2 H within and outof Seoul
# within Seoul
data_filtered_ts_lmm_inandout_colqrt <- data_filtered_ts_lmm %>%
filter(as.character(col_qrt) %in% c("low(Q1)", "high(Q4)")) %>%
mutate(col_qrt = factor(col_qrt, levels = c("low(Q1)", "high(Q4)"))) %>%
as_tibble() %>%
group_by(W_commune_nm, inandout, col_qrt, Time_ts) %>%
summarise(weighted_mean_flow = weighted.mean(flow_corrected, weights = flow),
cluster = cluster) %>%
distinct(W_commune_nm, inandout, col_qrt, Time_ts, weighted_mean_flow, cluster) %>%
as_tsibble(key = c(W_commune_nm, inandout, col_qrt), index = Time_ts) %>%
mutate(time = as.numeric(Time_ts) - 606)
data_filtered_ts_lmm_inandout_colqrt:) # A tsibble: 32 x 7 [1M]
:) # Key: W_commune_nm, inandout, col_qrt [8]
:) # Groups: W_commune_nm, inandout, col_qrt @ Time_ts [32]
:) W_commune_nm inandout col_qrt Time_ts weighted_mean_flow cluster time
:) <fct> <fct> <fct> <mth> <dbl> <fct> <dbl>
:) 1 Yeoksam withinSeoul low(Q1) 2020 7 100 Professional 0
:) 2 Yeoksam withinSeoul low(Q1) 2020 8 83.0 Professional 1
:) 3 Yeoksam withinSeoul low(Q1) 2020 9 87.1 Professional 2
:) 4 Yeoksam withinSeoul low(Q1) 2020 10 95.2 Professional 3
:) 5 Yeoksam withinSeoul high(Q4) 2020 7 100 Professional 0
:) 6 Yeoksam withinSeoul high(Q4) 2020 8 81.5 Professional 1
:) 7 Yeoksam withinSeoul high(Q4) 2020 9 79.2 Professional 2
:) 8 Yeoksam withinSeoul high(Q4) 2020 10 89.9 Professional 3
:) 9 Yeoksam outofSeoul low(Q1) 2020 7 100 Professional 0
:) 10 Yeoksam outofSeoul low(Q1) 2020 8 80.9 Professional 1
:) # ℹ 22 more rows
11.2.3 special for wave2
data_card_wave2_edited <- data_card_wave2 %>%
ungroup() %>%
filter(binary_type == "foods") %>%
select(-binary_type) %>%
group_by(W_commune_nm) %>%
mutate(time = as.numeric(Time_ts) - 606)
data_card_wave2_edited:) # A tsibble: 8 x 6 [1M]
:) # Key: W_commune_nm [2]
:) # Groups: W_commune_nm [2]
:) W_commune_nm Time_ts amount count cluster time
:) <fct> <mth> <dbl> <dbl> <fct> <dbl>
:) 1 Myeongdong 2020 7 100 100 Financial 0
:) 2 Myeongdong 2020 8 73.9 76.8 Financial 1
:) 3 Myeongdong 2020 9 76.3 76.8 Financial 2
:) 4 Myeongdong 2020 10 83.0 82.1 Financial 3
:) 5 Yeoksam 2020 7 100 100 Professional 0
:) 6 Yeoksam 2020 8 76.9 77.5 Professional 1
:) 7 Yeoksam 2020 9 76.2 72.1 Professional 2
:) 8 Yeoksam 2020 10 79.9 77.8 Professional 3
11.2.4 figure 8
data_filtered_ts_lmm_inandout_colqrt <- data_filtered_ts_lmm_inandout_colqrt %>%
mutate(col_qrt = ifelse(col_qrt == "low(Q1)", "low\n(14% to 35%)", "high\n(63% to 95%)")) 11.2.5 only for legend in figure 8
:) # A tsibble: 8 x 6 [1M]
:) # Key: W_commune_nm, name [2]
:) # Groups: W_commune_nm [2]
:) W_commune_nm Time_ts cluster time name value
:) <fct> <mth> <fct> <dbl> <chr> <dbl>
:) 1 Myeongdong 2020 7 Financial 0 amount 100
:) 2 Myeongdong 2020 8 Financial 1 amount 73.9
:) 3 Myeongdong 2020 9 Financial 2 amount 76.3
:) 4 Myeongdong 2020 10 Financial 3 amount 83.0
:) 5 Yeoksam 2020 7 Professional 0 amount 100
:) 6 Yeoksam 2020 8 Professional 1 amount 76.9
:) 7 Yeoksam 2020 9 Professional 2 amount 76.2
:) 8 Yeoksam 2020 10 Professional 3 amount 79.9
12 Clean your RAM
13 Mapping
13.1 set up
13.2 centroid only shp as a prerequisite
sf_1 <- dong.sf_commune_filtered %>%
st_centroid() %>%
select(name, geometry, cluster) %>%
rename(adm_commune_nm = name)
# st_crs(sf_1)
sf_2 <- dong.sf_resid_filtered %>%
st_centroid() %>%
select(adm_nm, geometry, college, col_qrt, col_qrt_withinSeoul, col_qrt_outofSeoul) %>%
rename(adm_commune_nm = adm_nm)
# st_crs(sf_2)
dong.sf_cent_for_line <- sf_1 %>%
bind_rows(sf_2)
dong.sf_cent_for_line:) Simple feature collection with 928 features and 6 fields
:) Geometry type: POINT
:) Dimension: XY
:) Bounding box: xmin: 908000 ymin: 1890000 xmax: 1000000 ymax: 1990000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 928 × 7
:) adm_commune_nm geometry cluster college col_qrt col_qrt_withinSeoul col_qrt_outofSeoul
:) * <chr> <POINT [m]> <fct> <dbl> <fct> <fct> <fct>
:) 1 Cheongdam (959549 1947085) Mixed NA <NA> <NA> <NA>
:) 2 Daechi (960758 1943106) Mixed NA <NA> <NA> <NA>
:) 3 Guro (945735 1943823) Manufacturing NA <NA> <NA> <NA>
:) 4 Gwanghui (956310 1951256) Manufacturing NA <NA> <NA> <NA>
:) 5 Jamsil (964079 1945649) Mixed NA <NA> <NA> <NA>
:) 6 Jongno (955017 1953704) Mixed NA <NA> <NA> <NA>
:) 7 Munjeong (967652 1943251) Manufacturing NA <NA> <NA> <NA>
:) 8 Myeongdong (953957 1951512) Financial NA <NA> <NA> <NA>
:) 9 Samseong (961392 1945542) Professional NA <NA> <NA> <NA>
:) 10 Seocho (956929 1943081) Professional NA <NA> <NA> <NA>
:) # ℹ 918 more rows
13.3 wave2 flow data into linestring shp
SET_WAVE = wave_2
linestring.sf <- data_filtered_ts %>%
filter(as.character(Time_ts) %in% SET_WAVE) %>%
group_by(hw_link) %>%
mutate(ROC = flow_corrected / first(flow_corrected) * 100) %>%
mutate(ROCC = ROC - first(ROC)) %>%
relocate(H_adm_nm, W_commune_nm, ROC, ROCC, flow_corrected) %>%
filter(Time == 8) %>%
tibble() %>%
ungroup() %>%
relocate(H_adm_nm, W_commune_nm, ROCC)
linestring.sf:) # A tibble: 10,098 × 25
:) H_adm_nm W_commune_nm ROCC ROC flow_corrected Time_ts flow distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:) <chr> <chr> <dbl> <dbl> <dbl> <mth> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:) 1 강남구_개포1동 Cheongdam -19.6 80.4 1622. 2020 8 1581. 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583. 1123068 high(Q4) high(Q4) <NA> 155890 212. 27.6 38.8 20 19
:) 2 강남구_개포1동 Daechi -17.3 82.7 2962. 2020 8 2886. 14.9 Mixed 0.889 1123068 8 강남구_개포1동 -> Daechi 2267 3098. 1123068 high(Q4) high(Q4) <NA> 116633 116. 29.2 31.3 20 19
:) 3 강남구_개포1동 Guro -16.1 83.9 397. 2020 8 387. 29.3 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Guro 2267 322. 1123068 high(Q4) high(Q4) <NA> 336770 304. 36.3 25.9 20 19
:) 4 강남구_개포1동 Gwanghui -8.54 91.5 249. 2020 8 242. 25.1 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Gwanghui 2267 356. 1123068 high(Q4) high(Q4) <NA> 159615 239. 14.8 44.7 20 19
:) 5 강남구_개포1동 Jamsil 5.34 105. 681. 2020 8 664. 18.1 Mixed 0.889 1123068 8 강남구_개포1동 -> Jamsil 2267 615. 1123068 high(Q4) high(Q4) <NA> 123030 102. 27.1 40.1 20 19
:) 6 강남구_개포1동 Jongno -46.8 53.2 500. 2020 8 487. 25.2 Mixed 0.889 1123068 8 강남구_개포1동 -> Jongno 2267 755. 1123068 high(Q4) high(Q4) <NA> 129040 203. 28.6 32.9 20 19
:) 7 강남구_개포1동 Munjeong -38.0 62.0 358. 2020 8 349. 20.9 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Munjeong 2267 549. 1123068 high(Q4) high(Q4) <NA> 110472 98.5 24.9 32.7 20 19
:) 8 강남구_개포1동 Myeongdong -45.2 54.8 666. 2020 8 649. 25.8 Financial 0.889 1123068 8 강남구_개포1동 -> Myeong 2267 965. 1123068 high(Q4) high(Q4) <NA> 217113 773. 40.1 29.7 20 19
:) 9 강남구_개포1동 Samseong -21.5 78.5 1172. 2020 8 1142. 16.9 Professional 0.889 1123068 8 강남구_개포1동 -> Samseong 2267 1255. 1123068 high(Q4) high(Q4) <NA> 120988 320. 31.0 37.8 20 19
:) 10 강남구_개포1동 Seocho -26.8 73.2 640. 2020 8 623. 17.9 Professional 0.889 1123068 8 강남구_개포1동 -> Seocho 2267 791. 1123068 high(Q4) high(Q4) <NA> 103264 236. 35.7 25.2 20 19
:) # ℹ 10,088 more rows
:) Simple feature collection with 10098 features and 25 fields
:) Geometry type: LINESTRING
:) Dimension: XY
:) Bounding box: xmin: 908000 ymin: 1890000 xmax: 1000000 ymax: 1990000
:) Projected CRS: Korea 2000 / Unified CS
:) First 10 features:
:) H_adm_nm W_commune_nm ROCC ROC flow_corrected Time_ts flow distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol geometry
:) 1 강남구_개포1동 Cheongdam -19.59 80.4 1622 2020 8 1581 17.7 Mixed 0.889 1123068 8 강남구_개포1동 -> Cheongdam 2267 1583 1123068 high(Q4) high(Q4) <NA> 155890 211.5 27.6 38.8 20 19 LINESTRING (961302 1942151,...
:) 2 강남구_개포1동 Daechi -17.32 82.7 2962 2020 8 2886 14.9 Mixed 0.889 1123068 8 강남구_개포1동 -> Daechi 2267 3098 1123068 high(Q4) high(Q4) <NA> 116633 116.2 29.2 31.3 20 19 LINESTRING (961302 1942151,...
:) 3 강남구_개포1동 Guro -16.10 83.9 397 2020 8 387 29.3 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Guro 2267 322 1123068 high(Q4) high(Q4) <NA> 336770 303.6 36.3 25.9 20 19 LINESTRING (961302 1942151,...
:) 4 강남구_개포1동 Gwanghui -8.54 91.5 249 2020 8 242 25.1 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Gwanghui 2267 356 1123068 high(Q4) high(Q4) <NA> 159615 238.6 14.8 44.7 20 19 LINESTRING (961302 1942151,...
:) 5 강남구_개포1동 Jamsil 5.34 105.3 681 2020 8 664 18.1 Mixed 0.889 1123068 8 강남구_개포1동 -> Jamsil 2267 615 1123068 high(Q4) high(Q4) <NA> 123030 101.7 27.1 40.1 20 19 LINESTRING (961302 1942151,...
:) 6 강남구_개포1동 Jongno -46.79 53.2 500 2020 8 487 25.2 Mixed 0.889 1123068 8 강남구_개포1동 -> Jongno 2267 755 1123068 high(Q4) high(Q4) <NA> 129040 203.3 28.6 32.9 20 19 LINESTRING (961302 1942151,...
:) 7 강남구_개포1동 Munjeong -37.97 62.0 358 2020 8 349 20.9 Manufacturing 0.889 1123068 8 강남구_개포1동 -> Munjeong 2267 549 1123068 high(Q4) high(Q4) <NA> 110472 98.5 24.9 32.7 20 19 LINESTRING (961302 1942151,...
:) 8 강남구_개포1동 Myeongdong -45.16 54.8 666 2020 8 649 25.8 Financial 0.889 1123068 8 강남구_개포1동 -> Myeong 2267 965 1123068 high(Q4) high(Q4) <NA> 217113 772.9 40.1 29.7 20 19 LINESTRING (961302 1942151,...
:) 9 강남구_개포1동 Samseong -21.52 78.5 1172 2020 8 1142 16.9 Professional 0.889 1123068 8 강남구_개포1동 -> Samseong 2267 1255 1123068 high(Q4) high(Q4) <NA> 120988 320.1 31.0 37.8 20 19 LINESTRING (961302 1942151,...
:) 10 강남구_개포1동 Seocho -26.77 73.2 640 2020 8 623 17.9 Professional 0.889 1123068 8 강남구_개포1동 -> Seocho 2267 791 1123068 high(Q4) high(Q4) <NA> 103264 235.9 35.7 25.2 20 19 LINESTRING (961302 1942151,...
13.4 By clusters
13.4.1 Professional
:) # A tibble: 2 × 2
:) RCC_quart mean_ROCC
:) <fct> <dbl>
:) 1 largest -31.7
:) 2 smallest -7.51
a <- linestring.sf %>%
filter(H_adm_nm != "중구_운서동") %>%
filter(flow_corrected > quantile(flow_corrected, THRESHOLD)) %>%
filter(as.character(cluster) == CLUST)
summary(a$ROCC):) Min. 1st Qu. Median Mean 3rd Qu. Max.
:) -56.5 -27.1 -20.7 -19.9 -13.3 29.5
:) Simple feature collection with 1 feature and 26 fields
:) Geometry type: LINESTRING
:) Dimension: XY
:) Bounding box: xmin: 957000 ymin: 1890000 xmax: 966000 ymax: 1940000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 1 × 27
:) H_adm_nm W_commune_nm ROCC ROC flow_corrected Time_ts flow distance cluster college H_adm_cd_shp Time hw_link new flow_mean adm_cd col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol geometry RCC_quart
:) * <chr> <chr> <dbl> <dbl> <dbl> <mth> <dbl> <dbl> <fct> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <LINESTRING [m]> <fct>
:) 1 평택시_비전2동 Seocho 6.65 107. 505. 2020 8 492. 40.7 Professional 0.465 3107065 8 평택시_비전2동 -> Seocho 2267 447. 3107065 middle-low(Q2) <NA> middle-high(Q3) 103264 236. 35.7 25.2 20 19 (966170 1888086, 956929 1943081) smallest
bounding_box <- st_bbox(flow.sf)
a.sf <- dong.sf_commune_filtered %>%
select(name, geometry, cluster) %>%
rename(adm_commune_nm = name) %>%
bind_rows(sf_2) %>%
filter(as.character(cluster) == CLUST)
b_1 <- flow.sf %>% ungroup() %>% pull(H_adm_nm)
b_2 <- flow.sf %>% st_drop_geometry() %>% select(H_adm_nm, ROCC, RCC_quart)
b.sf <- dong.sf_cent_for_line %>%
filter(adm_commune_nm %in% b_1) %>%
left_join(b_2, by = c("adm_commune_nm" = "H_adm_nm")) %>%
select(adm_commune_nm, geometry, RCC_quart) %>%
distinct(adm_commune_nm, geometry, RCC_quart)
rm(b_1, b_2)
b.sf:) Simple feature collection with 671 features and 2 fields
:) Geometry type: POINT
:) Dimension: XY
:) Bounding box: xmin: 917000 ymin: 1900000 xmax: 990000 ymax: 1980000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 671 × 3
:) adm_commune_nm geometry RCC_quart
:) <chr> <POINT [m]> <fct>
:) 1 종로구_사직동 (953233 1952856) smallest
:) 2 종로구_사직동 (953233 1952856) largest
:) 3 종로구_부암동 (952644 1955419) smallest
:) 4 종로구_평창동 (953060 1957494) largest
:) 5 종로구_무악동 (952193 1953219) largest
:) 6 종로구_교남동 (952678 1952545) smallest
:) 7 종로구_종로1.2.3.4가동 (954954 1952990) largest
:) 8 종로구_이화동 (956119 1953487) largest
:) 9 종로구_창신2동 (956749 1953074) largest
:) 10 종로구_숭인2동 (957645 1952899) smallest
:) # ℹ 661 more rows
s_1 <- b.sf %>%
st_drop_geometry() %>%
select(adm_commune_nm, RCC_quart)
s_2 <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
select(adm_cd, adm_nm, col_qrt) %>%
mutate(inout = ifelse(str_sub(adm_cd, 1, 2) == "11", "within", "out of")) %>%
mutate(inout = factor(inout))
s_prof <- s_1 %>%
left_join(s_2, by = c("adm_commune_nm" = "adm_nm"))
s_prof:) # A tibble: 671 × 5
:) adm_commune_nm RCC_quart adm_cd col_qrt inout
:) <chr> <fct> <chr> <fct> <fct>
:) 1 종로구_사직동 smallest 1101053 high(Q4) within
:) 2 종로구_사직동 largest 1101053 high(Q4) within
:) 3 종로구_부암동 smallest 1101055 high(Q4) within
:) 4 종로구_평창동 largest 1101056 high(Q4) within
:) 5 종로구_무악동 largest 1101057 high(Q4) within
:) 6 종로구_교남동 smallest 1101058 high(Q4) within
:) 7 종로구_종로1.2.3.4가동 largest 1101061 high(Q4) within
:) 8 종로구_이화동 largest 1101064 high(Q4) within
:) 9 종로구_창신2동 largest 1101068 low(Q1) within
:) 10 종로구_숭인2동 smallest 1101071 middle-low(Q2) within
:) # ℹ 661 more rows
13.4.2 Financial
:) # A tibble: 2 × 2
:) RCC_quart mean_ROCC
:) <fct> <dbl>
:) 1 largest -39.1
:) 2 smallest -16.1
a <- linestring.sf %>%
filter(H_adm_nm != "중구_운서동") %>%
filter(flow_corrected > quantile(flow_corrected, THRESHOLD)) %>%
filter(as.character(cluster) == CLUST)
summary(a$ROCC):) Min. 1st Qu. Median Mean 3rd Qu. Max.
:) -59.2 -36.6 -29.2 -28.6 -21.7 32.7
bounding_box <- st_bbox(flow.sf)
a.sf <- dong.sf_commune_filtered %>%
select(name, geometry, cluster) %>%
rename(adm_commune_nm = name) %>%
bind_rows(sf_2) %>%
filter(as.character(cluster) == CLUST)
b_1 <- flow.sf %>% ungroup() %>% pull(H_adm_nm)
b_2 <- flow.sf %>% st_drop_geometry() %>% select(H_adm_nm, ROCC, RCC_quart)
b.sf <- dong.sf_cent_for_line %>%
filter(adm_commune_nm %in% b_1) %>%
left_join(b_2, by = c("adm_commune_nm" = "H_adm_nm")) %>%
select(adm_commune_nm, geometry, RCC_quart) %>%
distinct(adm_commune_nm, geometry, RCC_quart)
rm(b_1, b_2)
b.sf:) Simple feature collection with 516 features and 2 fields
:) Geometry type: POINT
:) Dimension: XY
:) Bounding box: xmin: 917000 ymin: 1910000 xmax: 984000 ymax: 1980000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 516 × 3
:) adm_commune_nm geometry RCC_quart
:) <chr> <POINT [m]> <fct>
:) 1 종로구_부암동 (952644 1955419) largest
:) 2 종로구_평창동 (953060 1957494) largest
:) 3 종로구_교남동 (952678 1952545) largest
:) 4 종로구_가회동 (954666 1953805) largest
:) 5 종로구_이화동 (956119 1953487) smallest
:) 6 종로구_창신2동 (956749 1953074) smallest
:) 7 종로구_창신3동 (957019 1953397) smallest
:) 8 종로구_숭인1동 (957342 1953199) largest
:) 9 중구_회현동 (953816 1950972) smallest
:) 10 중구_명동 (954456 1951746) smallest
:) # ℹ 506 more rows
:) Simple feature collection with 2 features and 2 fields
:) Geometry type: MULTIPOINT
:) Dimension: XY
:) Bounding box: xmin: 917000 ymin: 1910000 xmax: 984000 ymax: 1980000
:) Projected CRS: Korea 2000 / Unified CS
:) # A tibble: 2 × 3
:) RCC_quart n geometry
:) * <fct> <int> <MULTIPOINT [m]>
:) 1 largest 254 ((922798 1960611), (923895 1933876), (924095 1948593), (924943 1929695), (926189 19612...
:) 2 smallest 262 ((917380 1944112), (925381 1948282), (925938 1941909), (926381 1952180), (926532 19406...
s_1 <- b.sf %>%
st_drop_geometry() %>%
select(adm_commune_nm, RCC_quart)
s_2 <- dong.sf_resid_filtered %>%
st_drop_geometry() %>%
select(adm_cd, adm_nm, col_qrt) %>%
mutate(inout = ifelse(str_sub(adm_cd, 1, 2) == "11", "within", "out of")) %>%
mutate(inout = factor(inout))
s_finan <- s_1 %>%
left_join(s_2, by = c("adm_commune_nm" = "adm_nm"))
s_finan:) # A tibble: 516 × 5
:) adm_commune_nm RCC_quart adm_cd col_qrt inout
:) <chr> <fct> <chr> <fct> <fct>
:) 1 종로구_부암동 largest 1101055 high(Q4) within
:) 2 종로구_평창동 largest 1101056 high(Q4) within
:) 3 종로구_교남동 largest 1101058 high(Q4) within
:) 4 종로구_가회동 largest 1101060 middle-high(Q3) within
:) 5 종로구_이화동 smallest 1101064 high(Q4) within
:) 6 종로구_창신2동 smallest 1101068 low(Q1) within
:) 7 종로구_창신3동 smallest 1101069 middle-high(Q3) within
:) 8 종로구_숭인1동 largest 1101070 middle-low(Q2) within
:) 9 중구_회현동 smallest 1102054 high(Q4) within
:) 10 중구_명동 smallest 1102055 high(Q4) within
:) # ℹ 506 more rows
13.4.3 overall stat
s_prof_finan <- s_prof %>%
bind_rows(s_finan) %>%
mutate(inout = case_when(as.character(inout) == "out of" ~ "out of Seoul",
as.character(inout) == "within" ~ "within Seoul")) %>%
mutate(clus = case_when(as.character(clus) == "prof" ~ "Professional",
as.character(clus) == "finan" ~ "Financial")) %>%
mutate(clus = factor(clus, levels = c("Professional", "Financial")))
s_prof_finan :) # A tibble: 1,187 × 6
:) adm_commune_nm RCC_quart adm_cd col_qrt inout clus
:) <chr> <fct> <chr> <fct> <chr> <fct>
:) 1 종로구_사직동 smallest 1101053 high(Q4) within Seoul Professional
:) 2 종로구_사직동 largest 1101053 high(Q4) within Seoul Professional
:) 3 종로구_부암동 smallest 1101055 high(Q4) within Seoul Professional
:) 4 종로구_평창동 largest 1101056 high(Q4) within Seoul Professional
:) 5 종로구_무악동 largest 1101057 high(Q4) within Seoul Professional
:) 6 종로구_교남동 smallest 1101058 high(Q4) within Seoul Professional
:) 7 종로구_종로1.2.3.4가동 largest 1101061 high(Q4) within Seoul Professional
:) 8 종로구_이화동 largest 1101064 high(Q4) within Seoul Professional
:) 9 종로구_창신2동 largest 1101068 low(Q1) within Seoul Professional
:) 10 종로구_숭인2동 smallest 1101071 middle-low(Q2) within Seoul Professional
:) # ℹ 1,177 more rows
s_prof_finan %>%
select(clus, RCC_quart, inout, col_qrt) %>%
gtsummary::tbl_strata(
strata = clus,
.tbl_fun =
~ .x %>%
tbl_strata(
strata = RCC_quart,
.tbl_fun =
~ .x %>%
tbl_summary(by = inout),
#.header = "**{strata} decrease**, N={n}"
),
.header = "**{strata} decrease**, N={n}") %>%
gtsummary::bold_labels()| Characteristic | Professional decrease, N=671 | Financial decrease, N=516 | ||||||
|---|---|---|---|---|---|---|---|---|
| out of Seoul, N = 1871 | within Seoul, N = 1541 | out of Seoul, N = 1081 | within Seoul, N = 2221 | out of Seoul, N = 1281 | within Seoul, N = 1261 | out of Seoul, N = 921 | within Seoul, N = 1701 | |
| col_qrt | ||||||||
| low(Q1) | 29 (16%) | 12 (7.8%) | 33 (31%) | 23 (10%) | 8 (6.3%) | 7 (5.6%) | 32 (35%) | 29 (17%) |
| middle-low(Q2) | 53 (28%) | 22 (14%) | 42 (39%) | 55 (25%) | 21 (16%) | 11 (8.7%) | 40 (43%) | 46 (27%) |
| middle-high(Q3) | 53 (28%) | 56 (36%) | 19 (18%) | 64 (29%) | 47 (37%) | 32 (25%) | 15 (16%) | 65 (38%) |
| high(Q4) | 52 (28%) | 64 (42%) | 14 (13%) | 80 (36%) | 52 (41%) | 76 (60%) | 5 (5.4%) | 30 (18%) |
| 1 n (%) | ||||||||
s_prof_finan %>%
select(clus, RCC_quart, inout, col_qrt) %>%
gtsummary::tbl_strata(strata = clus,
.tbl_fun = ~ .x %>%
tbl_summary(by = RCC_quart),
.header = "**{strata} decrease**, N={n}") %>%
gtsummary::bold_labels()| Characteristic | Professional decrease, N=671 | Financial decrease, N=516 | ||
|---|---|---|---|---|
| largest, N = 3411 | smallest, N = 3301 | largest, N = 2541 | smallest, N = 2621 | |
| inout | ||||
| out of Seoul | 187 (55%) | 108 (33%) | 128 (50%) | 92 (35%) |
| within Seoul | 154 (45%) | 222 (67%) | 126 (50%) | 170 (65%) |
| col_qrt | ||||
| low(Q1) | 41 (12%) | 56 (17%) | 15 (5.9%) | 61 (23%) |
| middle-low(Q2) | 75 (22%) | 97 (29%) | 32 (13%) | 86 (33%) |
| middle-high(Q3) | 109 (32%) | 83 (25%) | 79 (31%) | 80 (31%) |
| high(Q4) | 116 (34%) | 94 (28%) | 128 (50%) | 35 (13%) |
| 1 n (%) | ||||